Ejemplo n.º 1
0
 def __init__(self,
              net,
              dataloader=None,
              params=None,
              update_fn=None,
              eval_loader=None,
              test_loader=None,
              has_cuda=True):
     if has_cuda:
         device = torch.device("cuda:0")
     else:
         device = torch.device('cpu')
     self.net = net.to(device)
     self.loader = dataloader
     self.eval_loader = eval_loader
     self.test_loader = test_loader
     self.params = params
     self.device = device
     # self.net.to(device)
     if params is not None:
         n_data, num_classes = params['n_data'], params['num_classes']
         n_eval_data, batch_size = params['n_eval_data'], params[
             'batch_size']
         self.ensemble_pred = torch.zeros((n_data, num_classes),
                                          device=device)
         self.target_pred = torch.zeros((n_data, num_classes),
                                        device=device)
         t_one = torch.ones(())
         self.epoch_pred = t_one.new_empty((n_data, num_classes),
                                           dtype=torch.float32,
                                           device=device)
         self.epoch_mask = t_one.new_empty((n_data),
                                           dtype=torch.float32,
                                           device=device)
         self.train_epoch_loss = \
             t_one.new_empty((n_data // batch_size, 4),
                             dtype=torch.float32, device=device)
         self.train_epoch_acc = \
             t_one.new_empty((n_data // batch_size), dtype=torch.float32,
                             device=device)
         self.eval_epoch_loss = \
             t_one.new_empty((n_eval_data // batch_size, 2),
                             dtype=torch.float32, device=device)
         self.eval_epoch_acc = \
             t_one.new_empty((n_eval_data // batch_size, 2),
                             dtype=torch.float32, device=device)
         self.optimizer = opt.Adam(self.net.parameters())
         self.update_fn = update_fn
         self.ema = EMA(params['polyak_decay'], self.net, has_cuda)
         self.unsup_weight = 0.0
Ejemplo n.º 2
0
def test(model, ema, args, data):
    device = torch.device(
        f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    criterion = nn.CrossEntropyLoss()
    loss = 0
    answers = dict()
    model.eval()

    backup_params = EMA(0)
    for name, param in model.named_parameters():
        if param.requires_grad:
            backup_params.register(name, param.data)
            param.data.copy_(ema.get(name))

    with torch.set_grad_enabled(False):
        for batch in iter(data.dev_iter):
            p1, p2 = model(batch)
            batch_loss = criterion(p1, batch.s_idx) + criterion(
                p2, batch.e_idx)
            loss += batch_loss.item()

            # (batch, c_len, c_len)
            batch_size, c_len = p1.size()
            ls = nn.LogSoftmax(dim=1)
            mask = (torch.ones(c_len, c_len) *
                    float('-inf')).to(device).tril(-1).unsqueeze(0).expand(
                        batch_size, -1, -1)
            score = (ls(p1).unsqueeze(2) + ls(p2).unsqueeze(1)) + mask
            score, s_idx = score.max(dim=1)
            score, e_idx = score.max(dim=1)
            s_idx = torch.gather(s_idx, 1, e_idx.view(-1, 1)).squeeze()

            for i in range(batch_size):
                id = batch.id[i]
                answer = batch.c_word[0][i][s_idx[i]:e_idx[i] + 1]
                answer = ' '.join(
                    [data.WORD.vocab.itos[idx] for idx in answer])
                answers[id] = answer

        for name, param in model.named_parameters():
            if param.requires_grad:
                param.data.copy_(backup_params.get(name))

    with open(args.prediction_file, 'w', encoding='utf-8') as f:
        print(json.dumps(answers), file=f)

    results = evaluate.main(args)
    return loss, results['exact_match'], results['f1']
Ejemplo n.º 3
0
 def getema(self, period=14, label="ema"):
     tag = "ema:{}".format(period)
     ema = self.analyzer.getIndicator(tag)
     if not ema:
         ema = EMA(self.csdata, {"period": period, "label": label})
         self.analyzer.saveIndicator(tag, ema)
     return ema
Ejemplo n.º 4
0
def main():

    opts = load_option()
    print(opts)
    train_dataset, test_dataset = load_cifar10(opts, unlabeled_label=opts.unlabeled_label)

    model = ConvLarge(opts.num_classes + 1)
    center = EMA((len(train_dataset), opts.num_classes + 1), opts.alpha)
    if opts.cuda:
        model.to('cuda')
        center.to('cuda')
        
    trainer = Trainer(model, center, train_dataset, test_dataset, opts)

    trainer.validate()
    for epoch in range(opts.max_epoch):
        trainer.train()
        trainer.validate()
Ejemplo n.º 5
0
def MACD(df,t1=12,t2=26):
    if t1 > t2:
        tmp = t1
        t1 = t2
        t2 = tmp
    sma1 = SMA(df,t1)
    sma2 = SMA(df,t2)
    temp = pd.Series(sma1-sma2,index = df.index)
    signal = EMA(temp[t2:],9)
    return temp, signal
Ejemplo n.º 6
0
class RSI(object):

    def __init__(self, period):
        self.value = None
        self.last = None
        self.ema_u = EMA(period)
        self.ema_d = EMA(period)
        self.tbl = None
    
    def setupH5(self, h5file, h5where, h5name):
        if h5file != None and h5where != None and h5name != None:
            self.tbl = h5file.createTable(h5where, h5name, RSIData)

    def update(self, value, date=None):
        if self.last == None:
            self.last = value
        
        U = value - self.last
        D = self.last - value

        self.last = value

        if U > 0:
            D = 0
        elif D > 0:
            U = 0

        self.ema_u.update(U)
        self.ema_d.update(D)

        if self.ema_d.value == 0:
            self.value = 100.0
        else:
            rs = self.ema_u.value / self.ema_d.value
            self.value = 100.0 - (100.0 / (1 + rs))

        if self.tbl != None and date:
            self.tbl.row["date"] = date.date().toordinal()
            self.tbl.row["value"] = self.value
            self.tbl.row.append()
            self.tbl.flush()

        return self.value
Ejemplo n.º 7
0
class RSI(object):
    def __init__(self, period):
        self.value = None
        self.last = None
        self.ema_u = EMA(period)
        self.ema_d = EMA(period)
        self.tbl = None

    def setupH5(self, h5file, h5where, h5name):
        if h5file != None and h5where != None and h5name != None:
            self.tbl = h5file.createTable(h5where, h5name, RSIData)

    def update(self, value, date=None):
        if self.last == None:
            self.last = value

        U = value - self.last
        D = self.last - value

        self.last = value

        if U > 0:
            D = 0
        elif D > 0:
            U = 0

        self.ema_u.update(U)
        self.ema_d.update(D)

        if self.ema_d.value == 0:
            self.value = 100.0
        else:
            rs = self.ema_u.value / self.ema_d.value
            self.value = 100.0 - (100.0 / (1 + rs))

        if self.tbl != None and date:
            self.tbl.row["date"] = date.date().toordinal()
            self.tbl.row["value"] = self.value
            self.tbl.row.append()
            self.tbl.flush()

        return self.value
Ejemplo n.º 8
0
 def getema(self, period=14, label="ema", metric="closed"):
     tag = "ema:{}:{}".format(period, metric)
     ema = self.analyzer.getIndicator(tag)
     if not ema:
         ema = EMA(self.csdata, {
             "period": period,
             "label": label,
             "metric": metric
         })
         self.analyzer.saveIndicator(tag, ema)
     return ema
Ejemplo n.º 9
0
def train(save_pth, use_mixup, mixup_alpha):
    model, criteria = set_model()
    ema = EMA(model, ema_alpha)

    optim, lr_sheduler = set_optimizer(model)

    dltrain = get_train_loader(batch_size=batchsize,
                               num_workers=n_workers,
                               dataset=ds_name,
                               pin_memory=False)

    for e in range(n_epochs):
        tic = time.time()

        loss_avg = train_one_epoch(model, criteria, dltrain, optim, ema,
                                   use_mixup, mixup_alpha)
        lr_sheduler.step()
        acc = evaluate(model, verbose=False)
        ema.apply_shadow()
        acc_ema = evaluate(model, verbose=False)
        ema.restore()

        toc = time.time()
        msg = 'epoch: {}, loss: {:.4f}, lr: {:.4f}, acc: {:.4f}, acc_ema: {:.4f}, time: {:.2f}'.format(
            e, loss_avg,
            list(optim.param_groups)[0]['lr'], acc, acc_ema, toc - tic)
        print(msg)
    save_model(model, save_pth)
    print('done')
    return model
Ejemplo n.º 10
0
def train():
    model, criteria_x, criteria_u = set_model()

    n_iters_per_epoch = n_imgs_per_epoch // batchsize
    dltrain_x, dltrain_u = get_train_loader(
        batchsize, n_iters_per_epoch, L=250, K=n_guesses
    )
    lb_guessor = LabelGuessor(model, T=temperature)
    mixuper = MixUp(mixup_alpha)

    ema = EMA(model, ema_alpha)
    optim = torch.optim.Adam(model.parameters(), lr=lr)

    n_iters_per_epoch = n_imgs_per_epoch // batchsize
    lam_u_epoch = float(lam_u) / n_epoches
    lam_u_once = lam_u_epoch / n_iters_per_epoch

    train_args = dict(
        model=model,
        criteria_x=criteria_x,
        criteria_u=criteria_u,
        optim=optim,
        ema=ema,
        wd = 1 - weight_decay * lr,
        dltrain_x=dltrain_x,
        dltrain_u=dltrain_u,
        lb_guessor=lb_guessor,
        mixuper=mixuper,
        lambda_u=0,
        lambda_u_once=lam_u_once,
    )
    best_acc = -1
    print('start to train')
    for e in range(n_epoches):
        model.train()
        print('epoch: {}'.format(e))
        train_args['lambda_u'] = e * lam_u_epoch
        train_one_epoch(**train_args)
        torch.cuda.empty_cache()

        acc = evaluate(ema)
        best_acc = acc if best_acc < acc else best_acc
        log_msg = [
            'epoch: {}'.format(e),
            'acc: {:.4f}'.format(acc),
            'best_acc: {:.4f}'.format(best_acc)]
        print(', '.join(log_msg))
Ejemplo n.º 11
0
    def __testEMA(self, strDateTime, strType='O'):
        targetIdx = self.dicDatetime2Idx.get(strDateTime, -1)
        assert not (targetIdx == -1), "Incorrect input datetime."
        assert not (targetIdx not in self.dicTempResultMA_CPU
                    ), "Target datetime not in current temp result."

        strTypeForRaw = self.__convertTypeToRawType(strType)

        lstMA = []
        lstPrice = []
        for idx in xrange(targetIdx - self.timespan + 1, targetIdx + 1):
            lstMA.append(self.dicTempResultMA_CPU[idx][strType])
            lstPrice.append(float(self.dicRawData[idx][strTypeForRaw]))

        from ema import EMA
        ema = EMA(lstMA, lstPrice, self.timespan)
        ema.calculate()
        ema.show()
Ejemplo n.º 12
0
    def validate(self, model: torch.nn.Module, lossfunction: _Loss, iter: Iterator, ema=None, log_results=False) -> \
            Tuple[float, float, float]:
        model.eval()
        if ema is not None:
            backup_params = EMA.ema_backup_and_loadavg(ema, model)

        results = dict()
        ids = []
        lossvalues = []
        spans = []
        gt_spans = []
        span_probs = []
        for i, batch in enumerate(iter):
            ids += batch.id
            logprobs_S, logprobs_E = model(batch)
            loss_s = lossfunction(logprobs_S, batch.a_start)
            loss_e = lossfunction(logprobs_E, batch.a_end)
            loss = loss_s + loss_e
            lossvalues += loss.tolist()

            best_span_probs, candidates = model.decode(logprobs_S, logprobs_E)
            span_probs += best_span_probs.tolist()
            spans += self.get_spans(batch, candidates)
            gt_spans += batch.gt_answer

        # compute the final loss and results
        # we need to filter trhough multiple possible choices and pick the best one
        lossdict = defaultdict(lambda: math.inf)
        probs = defaultdict(lambda: 0)
        for id, value, span, span_prob in zip(ids, lossvalues, spans, span_probs):
            # record only lowest loss
            if lossdict[id] > value:
                lossdict[id] = value
            results[id] = span
            probs[id] = span_prob

        if log_results:
            self.log_results(results, probs)

        loss = sum(lossdict.values()) / len(lossdict)
        prediction_file = f".data/squad/dev_results_{socket.gethostname()}.json"
        with open(prediction_file, "w") as f:
            json.dump(results, f)

        dataset_file = ".data/squad/dev-v1.1.json"

        expected_version = '1.1'
        with open(dataset_file) as dataset_file:
            dataset_json = json.load(dataset_file)
            if (dataset_json['version'] != expected_version):
                logging.info('Evaluation expects v-' + expected_version +
                             ', but got dataset with v-' + dataset_json['version'],
                             file=sys.stderr)
            dataset = dataset_json['data']
        with open(prediction_file) as prediction_file:
            predictions = json.load(prediction_file)
        result = evaluate(dataset, predictions)
        logging.info(json.dumps(result))

        if ema is not None:
            EMA.ema_restore_backed_params(backup_params, model)

        return loss, result["exact_match"], result["f1"]
Ejemplo n.º 13
0
#!/usr/bin/python

import time
from srf02 import SRF02
from ema import EMA

# ===========================================================================
# Example Code
# ===========================================================================

# Initialise the sensor
srf02 = SRF02(0x70, debug=False)
# init EMA filter
rangeEMA = EMA(0.2)

while True:
  rangeCm = srf02.readRangeCentimeters()
  minRange = srf02.readMinRange()

  rangeFiltered = rangeEMA.filter(rangeCm)

#  rangeUs = srf02.readRangeMicroseconds()
#  print "range: %0.3f cm (min %0.3f; echo %0.3f ms)" % (rangeCm,minRange,rangeUs/1000.0)
  print "range: %0.2f cm filtered: %0.2f (min %0.3f)" % (rangeCm,rangeFiltered,minRange)
#  time.sleep(0.05)




def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--data', default='mimic3.npz', help='data file')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='random seed. Randomly set if not specified.')

    # training options
    parser.add_argument('--nz',
                        type=int,
                        default=32,
                        help='dimension of latent variable')
    parser.add_argument('--epoch',
                        type=int,
                        default=200,
                        help='number of training epochs')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        help='batch size')
    # Use smaller test batch size to accommodate more importance samples
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=32,
                        help='batch size for validation and test set')
    parser.add_argument('--train-k',
                        type=int,
                        default=8,
                        help='number of importance weights for training')
    parser.add_argument('--test-k',
                        type=int,
                        default=50,
                        help='number of importance weights for evaluation')
    parser.add_argument('--flow',
                        type=int,
                        default=2,
                        help='number of IAF layers')
    parser.add_argument('--lr',
                        type=float,
                        default=2e-4,
                        help='global learning rate')
    parser.add_argument('--enc-lr',
                        type=float,
                        default=1e-4,
                        help='encoder learning rate')
    parser.add_argument('--dec-lr',
                        type=float,
                        default=1e-4,
                        help='decoder learning rate')
    parser.add_argument('--min-lr',
                        type=float,
                        default=-1,
                        help='min learning rate for LR scheduler. '
                        '-1 to disable annealing')
    parser.add_argument('--wd', type=float, default=1e-3, help='weight decay')
    parser.add_argument('--overlap',
                        type=float,
                        default=.5,
                        help='kernel overlap')
    parser.add_argument('--cls',
                        type=float,
                        default=200,
                        help='classification weight')
    parser.add_argument('--clsdep',
                        type=int,
                        default=1,
                        help='number of layers for classifier')
    parser.add_argument('--ts',
                        type=float,
                        default=1,
                        help='log-likelihood weight for ELBO')
    parser.add_argument('--kl',
                        type=float,
                        default=.1,
                        help='KL weight for ELBO')
    parser.add_argument('--eval-interval',
                        type=int,
                        default=1,
                        help='AUC evaluation interval. '
                        '0 to disable evaluation.')
    parser.add_argument('--save-interval',
                        type=int,
                        default=0,
                        help='interval to save models. 0 to disable saving.')
    parser.add_argument('--prefix',
                        default='pvae',
                        help='prefix of output directory')
    parser.add_argument('--comp',
                        type=int,
                        default=7,
                        help='continuous convolution kernel size')
    parser.add_argument('--sigma',
                        type=float,
                        default=.2,
                        help='standard deviation for Gaussian likelihood')
    parser.add_argument('--dec-ch',
                        default='8-16-16',
                        help='decoder architecture')
    parser.add_argument('--enc-ch',
                        default='64-32-32-16',
                        help='encoder architecture')
    parser.add_argument('--rescale',
                        dest='rescale',
                        action='store_const',
                        const=True,
                        default=True,
                        help='if set, rescale time to [-1, 1]')
    parser.add_argument('--no-rescale',
                        dest='rescale',
                        action='store_const',
                        const=False)
    parser.add_argument('--cconvnorm',
                        dest='cconv_norm',
                        action='store_const',
                        const=True,
                        default=True,
                        help='if set, normalize continuous convolutional '
                        'layer using mean pooling')
    parser.add_argument('--no-cconvnorm',
                        dest='cconv_norm',
                        action='store_const',
                        const=False)
    parser.add_argument('--cconv-ref',
                        type=int,
                        default=98,
                        help='number of evenly-spaced reference locations '
                        'for continuous convolutional layer')
    parser.add_argument('--dec-ref',
                        type=int,
                        default=128,
                        help='number of evenly-spaced reference locations '
                        'for decoder')
    parser.add_argument('--ema',
                        dest='ema',
                        type=int,
                        default=0,
                        help='start epoch of exponential moving average '
                        '(EMA). -1 to disable EMA')
    parser.add_argument('--ema-decay',
                        type=float,
                        default=.9999,
                        help='EMA decay')

    args = parser.parse_args()

    nz = args.nz

    epochs = args.epoch
    eval_interval = args.eval_interval
    save_interval = args.save_interval

    if args.seed is None:
        rnd = np.random.RandomState(None)
        random_seed = rnd.randint(np.iinfo(np.uint32).max)
    else:
        random_seed = args.seed
    rnd = np.random.RandomState(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)

    max_time = 5
    cconv_ref = args.cconv_ref
    overlap = args.overlap
    train_dataset, val_dataset, test_dataset = time_series.split_data(
        args.data, rnd, max_time, cconv_ref, overlap, device, args.rescale)

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              drop_last=True,
                              collate_fn=train_dataset.collate_fn)
    n_train_batch = len(train_loader)

    val_loader = DataLoader(val_dataset,
                            batch_size=args.test_batch_size,
                            shuffle=False,
                            collate_fn=val_dataset.collate_fn)

    test_loader = DataLoader(test_dataset,
                             batch_size=args.test_batch_size,
                             shuffle=False,
                             collate_fn=test_dataset.collate_fn)

    in_channels, seq_len = train_dataset.data.shape[1:]

    dec_channels = [int(c) for c in args.dec_ch.split('-')] + [in_channels]
    enc_channels = [int(c) for c in args.enc_ch.split('-')]

    out_channels = enc_channels[0]

    squash = torch.sigmoid
    if args.rescale:
        squash = torch.tanh

    dec_ch_up = 2**(len(dec_channels) - 2)
    assert args.dec_ref % dec_ch_up == 0, (
        f'--dec-ref={args.dec_ref} is not divided by {dec_ch_up}.')
    dec_len0 = args.dec_ref // dec_ch_up
    grid_decoder = GridDecoder(nz, dec_channels, dec_len0, squash)

    decoder = Decoder(grid_decoder, max_time=max_time,
                      dec_ref=args.dec_ref).to(device)

    cconv = ContinuousConv1D(in_channels,
                             out_channels,
                             max_time,
                             cconv_ref,
                             overlap_rate=overlap,
                             kernel_size=args.comp,
                             norm=args.cconv_norm).to(device)
    encoder = Encoder(cconv, nz, enc_channels, args.flow).to(device)

    classifier = Classifier(nz, args.clsdep).to(device)

    pvae = PVAE(encoder, decoder, classifier, args.sigma, args.cls).to(device)

    ema = None
    if args.ema >= 0:
        ema = EMA(pvae, args.ema_decay, args.ema)

    other_params = [
        param for name, param in pvae.named_parameters()
        if not (name.startswith('decoder.grid_decoder')
                or name.startswith('encoder.grid_encoder'))
    ]
    params = [
        {
            'params': decoder.grid_decoder.parameters(),
            'lr': args.dec_lr
        },
        {
            'params': encoder.grid_encoder.parameters(),
            'lr': args.enc_lr
        },
        {
            'params': other_params
        },
    ]

    optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.wd)

    scheduler = make_scheduler(optimizer, args.lr, args.min_lr, epochs)

    path = '{}_{}'.format(args.prefix, datetime.now().strftime('%m%d.%H%M%S'))

    output_dir = Path('results') / 'mimic3-pvae' / path
    print(output_dir)
    log_dir = mkdir(output_dir / 'log')
    model_dir = mkdir(output_dir / 'model')

    start_epoch = 0

    with (log_dir / 'seed.txt').open('w') as f:
        print(random_seed, file=f)
    with (log_dir / 'gpu.txt').open('a') as f:
        print(torch.cuda.device_count(), start_epoch, file=f)
    with (log_dir / 'args.txt').open('w') as f:
        for key, val in sorted(vars(args).items()):
            print(f'{key}: {val}', file=f)
    with (log_dir / 'params.txt').open('w') as f:

        def print_params_count(module, name):
            try:  # sum counts if module is a list
                params_count = sum(count_parameters(m) for m in module)
            except TypeError:
                params_count = count_parameters(module)
            print(f'{name} {params_count}', file=f)

        print_params_count(grid_decoder, 'grid_decoder')
        print_params_count(decoder, 'decoder')
        print_params_count(cconv, 'cconv')
        print_params_count(encoder, 'encoder')
        print_params_count(classifier, 'classifier')
        print_params_count(pvae, 'pvae')
        print_params_count(pvae, 'total')

    tracker = Tracker(log_dir, n_train_batch)
    evaluator = Evaluator(pvae,
                          val_loader,
                          test_loader,
                          log_dir,
                          eval_args={'iw_samples': args.test_k})
    start = time.time()
    epoch_start = start

    for epoch in range(start_epoch, epochs):
        loss_breakdown = defaultdict(float)
        epoch_start = time.time()
        for (val, idx, mask, y, _, cconv_graph) in train_loader:
            optimizer.zero_grad()
            loss, _, _, loss_info = pvae(val, idx, mask, y, cconv_graph,
                                         args.train_k, args.ts, args.kl)
            loss.backward()
            optimizer.step()

            if ema:
                ema.update()

            for loss_name, loss_val in loss_info.items():
                loss_breakdown[loss_name] += loss_val

        if scheduler:
            scheduler.step()

        cur_time = time.time()
        tracker.log(epoch, loss_breakdown, cur_time - epoch_start,
                    cur_time - start)

        if eval_interval > 0 and (epoch + 1) % eval_interval == 0:
            if ema:
                ema.apply()
                evaluator.evaluate(epoch)
                ema.restore()
            else:
                evaluator.evaluate(epoch)

        model_dict = {
            'pvae': pvae.state_dict(),
            'ema': ema.state_dict() if ema else None,
            'epoch': epoch + 1,
            'args': args,
        }
        torch.save(model_dict, str(log_dir / 'model.pth'))
        if save_interval > 0 and (epoch + 1) % save_interval == 0:
            torch.save(model_dict, str(model_dir / f'{epoch:04d}.pth'))

    print(output_dir)
Ejemplo n.º 15
0
class SNTGRunLoop(object):
    def __init__(self,
                 net,
                 dataloader=None,
                 params=None,
                 update_fn=None,
                 eval_loader=None,
                 test_loader=None,
                 has_cuda=True):
        if has_cuda:
            device = torch.device("cuda:0")
        else:
            device = torch.device('cpu')
        self.net = net.to(device)
        self.loader = dataloader
        self.eval_loader = eval_loader
        self.test_loader = test_loader
        self.params = params
        self.device = device
        # self.net.to(device)
        if params is not None:
            n_data, num_classes = params['n_data'], params['num_classes']
            n_eval_data, batch_size = params['n_eval_data'], params[
                'batch_size']
            self.ensemble_pred = torch.zeros((n_data, num_classes),
                                             device=device)
            self.target_pred = torch.zeros((n_data, num_classes),
                                           device=device)
            t_one = torch.ones(())
            self.epoch_pred = t_one.new_empty((n_data, num_classes),
                                              dtype=torch.float32,
                                              device=device)
            self.epoch_mask = t_one.new_empty((n_data),
                                              dtype=torch.float32,
                                              device=device)
            self.train_epoch_loss = \
                t_one.new_empty((n_data // batch_size, 4),
                                dtype=torch.float32, device=device)
            self.train_epoch_acc = \
                t_one.new_empty((n_data // batch_size), dtype=torch.float32,
                                device=device)
            self.eval_epoch_loss = \
                t_one.new_empty((n_eval_data // batch_size, 2),
                                dtype=torch.float32, device=device)
            self.eval_epoch_acc = \
                t_one.new_empty((n_eval_data // batch_size, 2),
                                dtype=torch.float32, device=device)
            self.optimizer = opt.Adam(self.net.parameters())
            self.update_fn = update_fn
            self.ema = EMA(params['polyak_decay'], self.net, has_cuda)
            self.unsup_weight = 0.0
        # self.loss_fn = nn.CrossEntropyLoss()

    def train(self):
        # labeled_loss = nn.CrossEntropyLoss()
        train_losses, train_accs = [], []
        eval_losses, eval_accs = [], []
        ema_eval_losses, ema_eval_accs = [], []
        for epoch in range(self.params['num_epochs']):
            # training phase
            self.net.train()
            train_time = -time.time()
            self.epoch_pred.zero_()
            self.epoch_mask.zero_()
            # self.epoch_loss.zero_()
            self.unsup_weight = self.update_fn(self.optimizer, epoch)

            for i, data_batched in enumerate(self.loader, 0):
                images, is_lens, mask, indices = \
                    data_batched['image'], data_batched['is_lens'], \
                    data_batched['mask'], data_batched['index']

                targets = torch.index_select(self.target_pred, 0, indices)
                # print(f"y value dimension:{is_lens.size()}")

                self.optimizer.zero_grad()

                outputs, h_x = self.net(images)
                # print(f"output dimension: {outputs.size()}")
                predicts = F.softmax(outputs, dim=1)

                # update for ensemble
                for k, j in enumerate(indices):
                    self.epoch_pred[j] = predicts[k]
                    self.epoch_mask[j] = 1.0

                # labeled loss
                labeled_mask = mask.eq(0)
                # loss = self.loss_fn(
                # outputs[labeled_mask], is_lens[labeled_mask])
                # labeled loss with binary entropy with logits, use one_hot
                one_hot = torch.zeros(
                    len(is_lens[labeled_mask]), is_lens[labeled_mask].max()+1,
                    device=self.device) \
                    .scatter_(1, is_lens[labeled_mask].unsqueeze(1), 1.)
                loss = F.binary_cross_entropy_with_logits(
                    outputs[labeled_mask], one_hot)
                # one_hot = torch.zeros(
                #     len(is_lens), is_lens.max() + 1, device=self.device) \
                #     .scatter_(1, is_lens.unsqueeze(1), 1.)
                # loss = F.binary_cross_entropy_with_logits(outputs, one_hot)
                # print(loss.item())

                self.train_epoch_acc[i] = \
                    torch.mean(torch.argmax(
                        outputs[labeled_mask], 1).eq(is_lens[labeled_mask])
                    .float()).item()
                # train_acc = torch.mean(
                #     torch.argmax(outputs, 1).eq(is_lens).float())
                self.train_epoch_loss[i, 0] = loss.item()

                # unlabeled loss
                unlabeled_loss = torch.mean((predicts - targets)**2)
                self.train_epoch_loss[i, 1] = unlabeled_loss.item()
                loss += unlabeled_loss * self.unsup_weight

                # SNTG loss
                if self.params['embed']:
                    half = int(h_x.size()[0] // 2)
                    eucd2 = torch.mean((h_x[:half] - h_x[half:])**2, dim=1)
                    eucd = torch.sqrt(eucd2)
                    target_hard = torch.argmax(targets, dim=1).int()
                    merged_tar = torch.where(mask == 0, target_hard,
                                             is_lens.int())
                    neighbor_bool = torch.eq(merged_tar[:half],
                                             merged_tar[half:])
                    eucd_y = torch.where(eucd < 1.0, (1.0 - eucd)**2,
                                         torch.zeros_like(eucd))
                    embed_losses = torch.where(neighbor_bool, eucd2, eucd_y)
                    embed_loss = torch.mean(embed_losses)
                    self.train_epoch_loss[i, 2] = embed_loss.item()
                    loss += embed_loss * \
                        self.unsup_weight * self.params['embed_coeff']
                self.train_epoch_loss[i, 3] = loss.item()
                loss.backward()
                self.optimizer.step()
                self.ema.update()

            self.ensemble_pred = \
                self.params['pred_decay'] * self.ensemble_pred + \
                (1 - self.params['pred_decay']) * self.epoch_pred
            self.targets_pred = self.ensemble_pred / \
                (1.0 - self.params['pred_decay'] ** (epoch + 1))
            loss_mean = torch.mean(self.train_epoch_loss, 0)
            train_losses.append(loss_mean[3].item())
            acc_mean = torch.mean(self.train_epoch_acc)
            train_accs.append(acc_mean.item())
            print(f"epoch {epoch}, time cosumed: {time.time() + train_time}, "
                  f"labeled loss: {loss_mean[0].item()}, "
                  f"unlabeled loss: {loss_mean[1].item()}, "
                  f"SNTG loss: {loss_mean[2].item()}, "
                  f"total loss: {loss_mean[3].item()}")
            # print(f"epoch {epoch}, time consumed: {time.time() + train_time}, "
            #       f"labeled loss: {loss_mean[0].item()}")

            # eval phase
            if self.eval_loader is not None:
                # none ema evaluation
                self.net.eval()
                for i, data_batched in enumerate(self.eval_loader, 0):
                    images, is_lens = data_batched['image'], \
                        data_batched['is_lens']
                    # currently h_x in evalization is not used
                    eval_logits, _ = self.ema(images)
                    self.eval_epoch_acc[i, 0] = torch.mean(
                        torch.argmax(eval_logits,
                                     1).eq(is_lens).float()).item()
                    # print(f"ema evaluation accuracy: {ema_eval_acc.item()}")
                    eval_lens = torch.zeros(
                        len(is_lens), is_lens.max()+1,
                        device=self.device) \
                        .scatter_(1, is_lens.unsqueeze(1), 1.)
                    # eval_loss = self.loss_fn(eval_logits, is_lens)
                    self.eval_epoch_loss[i, 0] = \
                        F.binary_cross_entropy_with_logits(
                        eval_logits, eval_lens).item()
                    # break
                    eval_logits, _ = self.net(images)
                    self.eval_epoch_acc[i, 1] = torch.mean(
                        torch.argmax(eval_logits,
                                     1).eq(is_lens).float()).item()
                    # print(f"evaluation accuracy: {eval_acc.item()}")
                    self.eval_epoch_loss[i, 1] = \
                        F.binary_cross_entropy_with_logits(
                        eval_logits, eval_lens).item()
                loss_mean = torch.mean(self.eval_epoch_loss, 0)
                acc_mean = torch.mean(self.eval_epoch_acc, 0)
                ema_eval_accs.append(acc_mean[0].item())
                ema_eval_losses.append(loss_mean[0].item())
                eval_accs.append(acc_mean[1].item())
                eval_losses.append(loss_mean[1].item())
                print(f"ema accuracy: {acc_mean[0].item()}"
                      f"normal accuracy: {acc_mean[1].item()}")

        return train_losses, train_accs, eval_losses, eval_accs, \
            ema_eval_losses, ema_eval_accs

    def test(self):
        self.net.eval()
        with torch.no_grad():
            for i, data_batched in enumerate(self.test_loader, 0):
                images, is_lens = data_batched['image'], data_batched[
                    'is_lens']
                start = time.time()
                test_logits, _ = self.net(images)
                end = time.time()
                result = torch.argmax(F.softmax(test_logits, dim=1), dim=1)
                accuracy = torch.mean(result.eq(is_lens).float()).item()
                # return roc_curve(is_lens, test_logits)
                return result.tolist(), is_lens.tolist(), end - start, \
                    accuracy

    def test_origin(self):
        self.net.eval()
        with torch.no_grad():
            for i, data_batched in enumerate(self.test_loader, 0):
                images, is_lens = data_batched['image'], data_batched[
                    'is_lens']
                test_logits, _ = self.net(images)
                return test_logits, is_lens
Ejemplo n.º 16
0
def run(p_seed=0, p_epochs=150, p_kernel_size=5, p_logdir="temp"):
    # random number generator seed ------------------------------------------------#
    SEED = p_seed
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    np.random.seed(SEED)

    # kernel size of model --------------------------------------------------------#
    KERNEL_SIZE = p_kernel_size

    # number of epochs ------------------------------------------------------------#
    NUM_EPOCHS = p_epochs

    # file names ------------------------------------------------------------------#
    if not os.path.exists("../logs/%s" % p_logdir):
        os.makedirs("../logs/%s" % p_logdir)
    OUTPUT_FILE = str("../logs/%s/log%03d.out" % (p_logdir, SEED))
    MODEL_FILE = str("../logs/%s/model%03d.pth" % (p_logdir, SEED))

    # enable GPU usage ------------------------------------------------------------#
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    if use_cuda == False:
        print("WARNING: CPU will be used for training.")
        exit(0)

    # data augmentation methods ---------------------------------------------------#
    transform = transforms.Compose([
        RandomRotation(20, seed=SEED),
        transforms.RandomAffine(0, translate=(0.2, 0.2)),
    ])

    # data loader -----------------------------------------------------------------#
    train_dataset = MnistDataset(training=True, transform=transform)
    test_dataset = MnistDataset(training=False, transform=None)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=120,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=100,
                                              shuffle=False)

    # model selection -------------------------------------------------------------#
    if (KERNEL_SIZE == 3):
        model = ModelM3().to(device)
    elif (KERNEL_SIZE == 5):
        model = ModelM5().to(device)
    elif (KERNEL_SIZE == 7):
        model = ModelM7().to(device)

    summary(model, (1, 28, 28))

    # hyperparameter selection ----------------------------------------------------#
    ema = EMA(model, decay=0.999)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer,
                                                          gamma=0.98)

    # delete result file ----------------------------------------------------------#
    f = open(OUTPUT_FILE, 'w')
    f.close()

    # global variables ------------------------------------------------------------#
    g_step = 0
    max_correct = 0

    # training and evaluation loop ------------------------------------------------#
    for epoch in range(NUM_EPOCHS):
        #--------------------------------------------------------------------------#
        # train process                                                            #
        #--------------------------------------------------------------------------#
        model.train()
        train_loss = 0
        train_corr = 0
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            train_pred = output.argmax(dim=1, keepdim=True)
            train_corr += train_pred.eq(
                target.view_as(train_pred)).sum().item()
            train_loss += F.nll_loss(output, target, reduction='sum').item()
            loss.backward()
            optimizer.step()
            g_step += 1
            ema(model, g_step)
            if batch_idx % 100 == 0:
                print('Train Epoch: {} [{:05d}/{} ({:.0f}%)]\tLoss: {:.6f}'.
                      format(epoch, batch_idx * len(data),
                             len(train_loader.dataset),
                             100. * batch_idx / len(train_loader),
                             loss.item()))
        train_loss /= len(train_loader.dataset)
        train_accuracy = 100 * train_corr / len(train_loader.dataset)

        #--------------------------------------------------------------------------#
        # test process                                                             #
        #--------------------------------------------------------------------------#
        model.eval()
        ema.assign(model)
        test_loss = 0
        correct = 0
        total_pred = np.zeros(0)
        total_target = np.zeros(0)
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                test_loss += F.nll_loss(output, target, reduction='sum').item()
                pred = output.argmax(dim=1, keepdim=True)
                total_pred = np.append(total_pred, pred.cpu().numpy())
                total_target = np.append(total_target, target.cpu().numpy())
                correct += pred.eq(target.view_as(pred)).sum().item()
            if (max_correct < correct):
                torch.save(model.state_dict(), MODEL_FILE)
                max_correct = correct
                print("Best accuracy! correct images: %5d" % correct)
        ema.resume(model)

        #--------------------------------------------------------------------------#
        # output                                                                   #
        #--------------------------------------------------------------------------#
        test_loss /= len(test_loader.dataset)
        test_accuracy = 100 * correct / len(test_loader.dataset)
        best_test_accuracy = 100 * max_correct / len(test_loader.dataset)
        print(
            '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%) (best: {:.2f}%)\n'
            .format(test_loss, correct, len(test_loader.dataset),
                    test_accuracy, best_test_accuracy))

        f = open(OUTPUT_FILE, 'a')
        f.write(" %3d %12.6f %9.3f %12.6f %9.3f %9.3f\n" %
                (epoch, train_loss, train_accuracy, test_loss, test_accuracy,
                 best_test_accuracy))
        f.close()

        #--------------------------------------------------------------------------#
        # update learning rate scheduler                                           #
        #--------------------------------------------------------------------------#
        lr_scheduler.step()
Ejemplo n.º 17
0
def train_val_model(pipeline_cfg, model_cfg, train_cfg):
    data_pipeline = DataPipeline(**pipeline_cfg)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if model_cfg['cxt_emb_pretrained'] is not None:
        model_cfg['cxt_emb_pretrained'] = torch.load(
            model_cfg['cxt_emb_pretrained'])
    bidaf = BiDAF(word_emb=data_pipeline.word_type.vocab.vectors, **model_cfg)
    ema = EMA(train_cfg['exp_decay_rate'])
    for name, param in bidaf.named_parameters():
        if param.requires_grad:
            ema.register(name, param.data)
    parameters = filter(lambda p: p.requires_grad, bidaf.parameters())
    optimizer = optim.Adadelta(parameters, lr=train_cfg['lr'])
    criterion = nn.CrossEntropyLoss()

    result = {'best_f1': 0.0, 'best_model': None}

    num_epochs = train_cfg['num_epochs']
    for epoch in range(1, num_epochs + 1):
        print('Epoch {}/{}'.format(epoch, num_epochs))
        print('-' * 10)
        for phase in ['train', 'val']:
            val_answers = dict()
            val_f1 = 0
            val_em = 0
            val_cnt = 0
            val_r = 0

            if phase == 'train':
                bidaf.train()
            else:
                bidaf.eval()
                backup_params = EMA(0)
                for name, param in bidaf.named_parameters():
                    if param.requires_grad:
                        backup_params.register(name, param.data)
                        param.data.copy_(ema.get(name))

            with torch.set_grad_enabled(phase == 'train'):
                for batch_num, batch in enumerate(
                        data_pipeline.data_iterators[phase]):
                    optimizer.zero_grad()
                    p1, p2 = bidaf(batch)
                    loss = criterion(p1, batch.s_idx) + criterion(
                        p2, batch.e_idx)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        for name, param in bidaf.named_parameters():
                            if param.requires_grad:
                                ema.update(name, param.data)
                        if batch_num % train_cfg['batch_per_disp'] == 0:
                            batch_loss = loss.item()
                            print('batch %d: loss %.3f' %
                                  (batch_num, batch_loss))

                    if phase == 'val':
                        batch_size, c_len = p1.size()
                        val_cnt += batch_size
                        ls = nn.LogSoftmax(dim=1)
                        mask = (torch.ones(c_len, c_len) * float('-inf')).to(device).tril(-1). \
                            unsqueeze(0).expand(batch_size, -1, -1)
                        score = (ls(p1).unsqueeze(2) +
                                 ls(p2).unsqueeze(1)) + mask
                        score, s_idx = score.max(dim=1)
                        score, e_idx = score.max(dim=1)
                        s_idx = torch.gather(s_idx, 1,
                                             e_idx.view(-1, 1)).squeeze()

                        for i in range(batch_size):
                            answer = (s_idx[i], e_idx[i])
                            gt = (batch.s_idx[i], batch.e_idx[i])
                            val_f1 += f1_score(answer, gt)
                            val_em += exact_match_score(answer, gt)
                            val_r += r_score(answer, gt)

            if phase == 'val':
                val_f1 = val_f1 * 100 / val_cnt
                val_em = val_em * 100 / val_cnt
                val_r = val_r * 100 / val_cnt
                print('Epoch %d: %s f1 %.3f | %s em %.3f |  %s rouge %.3f' %
                      (epoch, phase, val_f1, phase, val_em, phase, val_r))
                if val_f1 > result['best_f1']:
                    result['best_f1'] = val_f1
                    result['best_em'] = val_em
                    result['best_model'] = copy.deepcopy(bidaf.state_dict())
                    torch.save(result, train_cfg['ckpoint_file'])
                    # with open(train_cfg['val_answers'], 'w', encoding='utf-8') as f:
                    #     print(json.dumps(val_answers), file=f)
                for name, param in bidaf.named_parameters():
                    if param.requires_grad:
                        param.data.copy_(backup_params.get(name))
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--data', default='mimic3.npz', help='data file')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='random seed. Randomly set if not specified.')

    # training options
    parser.add_argument('--nz',
                        type=int,
                        default=32,
                        help='dimension of latent variable')
    parser.add_argument('--epoch',
                        type=int,
                        default=500,
                        help='number of training epochs')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        help='batch size')
    # Use smaller test batch size to accommodate more importance samples
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=32,
                        help='batch size for validation and test set')
    parser.add_argument('--lr',
                        type=float,
                        default=2e-4,
                        help='encoder/decoder learning rate')
    parser.add_argument('--dis-lr',
                        type=float,
                        default=3e-4,
                        help='discriminator learning rate')
    parser.add_argument('--min-lr',
                        type=float,
                        default=1e-4,
                        help='min encoder/decoder learning rate for LR '
                        'scheduler. -1 to disable annealing')
    parser.add_argument('--min-dis-lr',
                        type=float,
                        default=1.5e-4,
                        help='min discriminator learning rate for LR '
                        'scheduler. -1 to disable annealing')
    parser.add_argument('--wd', type=float, default=1e-4, help='weight decay')
    parser.add_argument('--overlap',
                        type=float,
                        default=.5,
                        help='kernel overlap')
    parser.add_argument('--cls',
                        type=float,
                        default=1,
                        help='classification weight')
    parser.add_argument('--clsdep',
                        type=int,
                        default=1,
                        help='number of layers for classifier')
    parser.add_argument('--eval-interval',
                        type=int,
                        default=1,
                        help='AUC evaluation interval. '
                        '0 to disable evaluation.')
    parser.add_argument('--save-interval',
                        type=int,
                        default=0,
                        help='interval to save models. 0 to disable saving.')
    parser.add_argument('--prefix',
                        default='pbigan',
                        help='prefix of output directory')
    parser.add_argument('--comp',
                        type=int,
                        default=7,
                        help='continuous convolution kernel size')
    parser.add_argument('--ae',
                        type=float,
                        default=1,
                        help='autoencoding regularization strength')
    parser.add_argument('--aeloss',
                        default='mse',
                        help='autoencoding loss. (options: mse, smooth_l1)')
    parser.add_argument('--dec-ch',
                        default='8-16-16',
                        help='decoder architecture')
    parser.add_argument('--enc-ch',
                        default='64-32-32-16',
                        help='encoder architecture')
    parser.add_argument('--dis-ch',
                        default=None,
                        help='discriminator architecture. Use encoder '
                        'architecture if unspecified.')
    parser.add_argument('--rescale',
                        dest='rescale',
                        action='store_const',
                        const=True,
                        default=True,
                        help='if set, rescale time to [-1, 1]')
    parser.add_argument('--no-rescale',
                        dest='rescale',
                        action='store_const',
                        const=False)
    parser.add_argument('--cconvnorm',
                        dest='cconv_norm',
                        action='store_const',
                        const=True,
                        default=True,
                        help='if set, normalize continuous convolutional '
                        'layer using mean pooling')
    parser.add_argument('--no-cconvnorm',
                        dest='cconv_norm',
                        action='store_const',
                        const=False)
    parser.add_argument('--cconv-ref',
                        type=int,
                        default=98,
                        help='number of evenly-spaced reference locations '
                        'for continuous convolutional layer')
    parser.add_argument('--dec-ref',
                        type=int,
                        default=128,
                        help='number of evenly-spaced reference locations '
                        'for decoder')
    parser.add_argument('--trans',
                        type=int,
                        default=2,
                        help='number of encoder layers')
    parser.add_argument('--ema',
                        dest='ema',
                        type=int,
                        default=0,
                        help='start epoch of exponential moving average '
                        '(EMA). -1 to disable EMA')
    parser.add_argument('--ema-decay',
                        type=float,
                        default=.9999,
                        help='EMA decay')
    parser.add_argument('--mmd',
                        type=float,
                        default=1,
                        help='MMD strength for latent variable')

    args = parser.parse_args()

    nz = args.nz

    epochs = args.epoch
    eval_interval = args.eval_interval
    save_interval = args.save_interval

    if args.seed is None:
        rnd = np.random.RandomState(None)
        random_seed = rnd.randint(np.iinfo(np.uint32).max)
    else:
        random_seed = args.seed
    rnd = np.random.RandomState(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)

    max_time = 5
    cconv_ref = args.cconv_ref
    overlap = args.overlap
    train_dataset, val_dataset, test_dataset = time_series.split_data(
        args.data, rnd, max_time, cconv_ref, overlap, device, args.rescale)

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              drop_last=True,
                              collate_fn=train_dataset.collate_fn)
    n_train_batch = len(train_loader)

    time_loader = DataLoader(train_dataset,
                             batch_size=args.batch_size,
                             shuffle=True,
                             drop_last=True,
                             collate_fn=train_dataset.collate_fn)

    val_loader = DataLoader(val_dataset,
                            batch_size=args.test_batch_size,
                            shuffle=False,
                            collate_fn=val_dataset.collate_fn)

    test_loader = DataLoader(test_dataset,
                             batch_size=args.test_batch_size,
                             shuffle=False,
                             collate_fn=test_dataset.collate_fn)

    in_channels, seq_len = train_dataset.data.shape[1:]

    if args.dis_ch is None:
        args.dis_ch = args.enc_ch

    dec_channels = [int(c) for c in args.dec_ch.split('-')] + [in_channels]
    enc_channels = [int(c) for c in args.enc_ch.split('-')]
    dis_channels = [int(c) for c in args.dis_ch.split('-')]

    out_channels = enc_channels[0]

    squash = torch.sigmoid
    if args.rescale:
        squash = torch.tanh

    dec_ch_up = 2**(len(dec_channels) - 2)
    assert args.dec_ref % dec_ch_up == 0, (
        f'--dec-ref={args.dec_ref} is not divided by {dec_ch_up}.')
    dec_len0 = args.dec_ref // dec_ch_up
    grid_decoder = GridDecoder(nz, dec_channels, dec_len0, squash)

    decoder = Decoder(grid_decoder, max_time=max_time,
                      dec_ref=args.dec_ref).to(device)
    cconv = ContinuousConv1D(in_channels,
                             out_channels,
                             max_time,
                             cconv_ref,
                             overlap_rate=overlap,
                             kernel_size=args.comp,
                             norm=args.cconv_norm).to(device)
    encoder = Encoder(cconv, nz, enc_channels, args.trans).to(device)

    classifier = Classifier(nz, args.clsdep).to(device)

    pbigan = PBiGAN(encoder, decoder, classifier,
                    ae_loss=args.aeloss).to(device)

    ema = None
    if args.ema >= 0:
        ema = EMA(pbigan, args.ema_decay, args.ema)

    critic_cconv = ContinuousConv1D(in_channels,
                                    out_channels,
                                    max_time,
                                    cconv_ref,
                                    overlap_rate=overlap,
                                    kernel_size=args.comp,
                                    norm=args.cconv_norm).to(device)
    critic_embed = 32
    critic = ConvCritic(critic_cconv, nz, dis_channels,
                        critic_embed).to(device)

    optimizer = optim.Adam(pbigan.parameters(),
                           lr=args.lr,
                           betas=(0, .999),
                           weight_decay=args.wd)
    critic_optimizer = optim.Adam(critic.parameters(),
                                  lr=args.dis_lr,
                                  betas=(0, .999),
                                  weight_decay=args.wd)

    scheduler = make_scheduler(optimizer, args.lr, args.min_lr, epochs)
    dis_scheduler = make_scheduler(critic_optimizer, args.dis_lr,
                                   args.min_dis_lr, epochs)

    path = '{}_{}'.format(args.prefix, datetime.now().strftime('%m%d.%H%M%S'))

    output_dir = Path('results') / 'mimic3-pbigan' / path
    print(output_dir)
    log_dir = mkdir(output_dir / 'log')
    model_dir = mkdir(output_dir / 'model')

    start_epoch = 0

    with (log_dir / 'seed.txt').open('w') as f:
        print(random_seed, file=f)
    with (log_dir / 'gpu.txt').open('a') as f:
        print(torch.cuda.device_count(), start_epoch, file=f)
    with (log_dir / 'args.txt').open('w') as f:
        for key, val in sorted(vars(args).items()):
            print(f'{key}: {val}', file=f)
    with (log_dir / 'params.txt').open('w') as f:

        def print_params_count(module, name):
            try:  # sum counts if module is a list
                params_count = sum(count_parameters(m) for m in module)
            except TypeError:
                params_count = count_parameters(module)
            print(f'{name} {params_count}', file=f)

        print_params_count(grid_decoder, 'grid_decoder')
        print_params_count(decoder, 'decoder')
        print_params_count(cconv, 'cconv')
        print_params_count(encoder, 'encoder')
        print_params_count(classifier, 'classifier')
        print_params_count(pbigan, 'pbigan')
        print_params_count(critic, 'critic')
        print_params_count([pbigan, critic], 'total')

    tracker = Tracker(log_dir, n_train_batch)
    evaluator = Evaluator(pbigan, val_loader, test_loader, log_dir)
    start = time.time()
    epoch_start = start

    batch_size = args.batch_size

    for epoch in range(start_epoch, epochs):
        loss_breakdown = defaultdict(float)
        epoch_start = time.time()

        if epoch >= 40:
            args.cls = 200

        for ((val, idx, mask, y, _, cconv_graph),
             (_, idx_t, mask_t, _, index, _)) in zip(train_loader,
                                                     time_loader):

            z_enc, x_recon, z_gen, x_gen, ae_loss, cls_loss = pbigan(
                val, idx, mask, y, cconv_graph, idx_t, mask_t)

            cconv_graph_gen = train_dataset.make_graph(x_gen, idx_t, mask_t,
                                                       index)

            # Don't need pbigan.requires_grad_(False);
            # critic takes as input only the detached tensors.
            real = critic(cconv_graph, batch_size, z_enc.detach())
            detached_graph = [[cat_y.detach() for cat_y in x] if i == 2 else x
                              for i, x in enumerate(cconv_graph_gen)]
            fake = critic(detached_graph, batch_size, z_gen.detach())

            D_loss = gan_loss(real, fake, 1, 0)

            critic_optimizer.zero_grad()
            D_loss.backward()
            critic_optimizer.step()

            for p in critic.parameters():
                p.requires_grad_(False)
            real = critic(cconv_graph, batch_size, z_enc)
            fake = critic(cconv_graph_gen, batch_size, z_gen)

            G_loss = gan_loss(real, fake, 0, 1)

            mmd_loss = mmd(z_enc, z_gen)

            loss = (G_loss + ae_loss * args.ae + cls_loss * args.cls +
                    mmd_loss * args.mmd)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            for p in critic.parameters():
                p.requires_grad_(True)

            if ema:
                ema.update()

            loss_breakdown['D'] += D_loss.item()
            loss_breakdown['G'] += G_loss.item()
            loss_breakdown['AE'] += ae_loss.item()
            loss_breakdown['MMD'] += mmd_loss.item()
            loss_breakdown['CLS'] += cls_loss.item()
            loss_breakdown['total'] += loss.item()

        if scheduler:
            scheduler.step()
        if dis_scheduler:
            dis_scheduler.step()

        cur_time = time.time()
        tracker.log(epoch, loss_breakdown, cur_time - epoch_start,
                    cur_time - start)

        if eval_interval > 0 and (epoch + 1) % eval_interval == 0:
            if ema:
                ema.apply()
                evaluator.evaluate(epoch)
                ema.restore()
            else:
                evaluator.evaluate(epoch)

        model_dict = {
            'pbigan': pbigan.state_dict(),
            'critic': critic.state_dict(),
            'ema': ema.state_dict() if ema else None,
            'epoch': epoch + 1,
            'args': args,
        }
        torch.save(model_dict, str(log_dir / 'model.pth'))
        if save_interval > 0 and (epoch + 1) % save_interval == 0:
            torch.save(model_dict, str(model_dir / f'{epoch:04d}.pth'))

    print(output_dir)
Ejemplo n.º 19
0
def train():
    n_iters_per_epoch = args.n_imgs_per_epoch // args.batchsize
    n_iters_all = n_iters_per_epoch * args.n_epochs #/ args.mu_c
    epsilon = 0.000001

    model, criteria_x, criteria_u = set_model()
    lb_guessor = LabelGuessor(thresh=args.thr)
    ema = EMA(model, args.ema_alpha)

    wd_params, non_wd_params = [], []
    for param in model.parameters():
        if len(param.size()) == 1:
            non_wd_params.append(param)
        else:
            wd_params.append(param)
    param_list = [{'params': wd_params}, {'params': non_wd_params, 'weight_decay': 0}]
    optim = torch.optim.SGD(param_list, lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum, nesterov=True)
    lr_schdlr = WarmupCosineLrScheduler(optim, max_iter=n_iters_all, warmup_iter=0)

    dltrain_x, dltrain_u, dltrain_all = get_train_loader(args.batchsize, args.mu, args.mu_c, n_iters_per_epoch, 
                                                         L=args.n_labeled, seed=args.seed)
    train_args = dict(
        model=model,
        criteria_x=criteria_x,
        criteria_u=criteria_u,
        optim=optim,
        lr_schdlr=lr_schdlr,
        ema=ema,
        dltrain_x=dltrain_x,
        dltrain_u=dltrain_u,
        dltrain_all=dltrain_all,
        lb_guessor=lb_guessor,
    )
    n_labeled = int(args.n_labeled / args.n_classes)
    best_acc, top1 = -1, -1
    results = {'top 1 acc': [], 'best_acc': []}
    
    b_schedule = [args.n_epochs/2, 3*args.n_epochs/4]
    if args.boot_schedule == 1:
        step = int(args.n_epochs/3)
        b_schedule = [step, 2*step]
    elif args.boot_schedule == 2:
        step = int(args.n_epochs/4)
        b_schedule = [step, 2*step, 3*step]
        
    for e in range(args.n_epochs):
        if args.bootstrap > 1 and (e in b_schedule):
            seed = 99
            n_labeled *= args.bootstrap
            name = sort_unlabeled(ema, n_labeled)
            print("Bootstrap at epoch ", e," Name = ",name)
            dltrain_x, dltrain_u, dltrain_all = get_train_loader(args.batchsize, args.mu, args.mu_c, n_iters_per_epoch, 
                                                                 L=10*n_labeled, seed=seed, name=name)
            train_args = dict(
                model=model,
                criteria_x=criteria_x,
                criteria_u=criteria_u,
                optim=optim,
                lr_schdlr=lr_schdlr,
                ema=ema,
                dltrain_x=dltrain_x,
                dltrain_u=dltrain_u,
                dltrain_all=dltrain_all,
                lb_guessor=lb_guessor,
            )

        model.train()
        train_one_epoch(**train_args)
        torch.cuda.empty_cache()

        if args.test == 0 or args.lam_clr < epsilon:
            top1 = evaluate(ema) * 100
        elif args.test == 1:
            memory_data = utils.CIFAR10Pair(root='dataset', train=True, transform=utils.test_transform, download=False)
            memory_data_loader = DataLoader(memory_data, batch_size=args.batchsize, shuffle=False, num_workers=16, pin_memory=True)
            test_data = utils.CIFAR10Pair(root='dataset', train=False, transform=utils.test_transform, download=False)
            test_data_loader = DataLoader(test_data, batch_size=args.batchsize, shuffle=False, num_workers=16, pin_memory=True)
            c = len(memory_data.classes) #10
            top1 = test(model, memory_data_loader, test_data_loader, c, e)
            
        best_acc = top1 if best_acc < top1 else best_acc

        results['top 1 acc'].append('{:.4f}'.format(top1))
        results['best_acc'].append('{:.4f}'.format(best_acc))
        data_frame = pd.DataFrame(data=results)
        data_frame.to_csv(result_dir + '/' + save_name_pre + '.accuracy.csv', index_label='epoch')

        log_msg = [
            'epoch: {}'.format(e + 1),
            'top 1 acc: {:.4f}'.format(top1),
            'best_acc: {:.4f}'.format(best_acc)]
        print(', '.join(log_msg))
Ejemplo n.º 20
0
def main():
    parser = argparse.ArgumentParser()

    default_dataset = 'toy-data.npz'
    parser.add_argument('--data', default=default_dataset, help='data file')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='random seed. Randomly set if not specified.')

    # training options
    parser.add_argument('--nz',
                        type=int,
                        default=32,
                        help='dimension of latent variable')
    parser.add_argument('--epoch',
                        type=int,
                        default=1000,
                        help='number of training epochs')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        help='batch size')
    parser.add_argument('--lr',
                        type=float,
                        default=8e-5,
                        help='encoder/decoder learning rate')
    parser.add_argument('--dis-lr',
                        type=float,
                        default=1e-4,
                        help='discriminator learning rate')
    parser.add_argument('--min-lr',
                        type=float,
                        default=5e-5,
                        help='min encoder/decoder learning rate for LR '
                        'scheduler. -1 to disable annealing')
    parser.add_argument('--min-dis-lr',
                        type=float,
                        default=7e-5,
                        help='min discriminator learning rate for LR '
                        'scheduler. -1 to disable annealing')
    parser.add_argument('--wd', type=float, default=0, help='weight decay')
    parser.add_argument('--overlap',
                        type=float,
                        default=.5,
                        help='kernel overlap')
    parser.add_argument('--no-norm-trans',
                        action='store_true',
                        help='if set, use Gaussian posterior without '
                        'transformation')
    parser.add_argument('--plot-interval',
                        type=int,
                        default=1,
                        help='plot interval. 0 to disable plotting.')
    parser.add_argument('--save-interval',
                        type=int,
                        default=0,
                        help='interval to save models. 0 to disable saving.')
    parser.add_argument('--prefix',
                        default='pbigan',
                        help='prefix of output directory')
    parser.add_argument('--comp',
                        type=int,
                        default=7,
                        help='continuous convolution kernel size')
    parser.add_argument('--ae',
                        type=float,
                        default=.2,
                        help='autoencoding regularization strength')
    parser.add_argument('--aeloss',
                        default='smooth_l1',
                        help='autoencoding loss. (options: mse, smooth_l1)')
    parser.add_argument('--ema',
                        dest='ema',
                        type=int,
                        default=-1,
                        help='start epoch of exponential moving average '
                        '(EMA). -1 to disable EMA')
    parser.add_argument('--ema-decay',
                        type=float,
                        default=.9999,
                        help='EMA decay')
    parser.add_argument('--mmd',
                        type=float,
                        default=1,
                        help='MMD strength for latent variable')

    # squash is off when rescale is off
    parser.add_argument('--squash',
                        dest='squash',
                        action='store_const',
                        const=True,
                        default=True,
                        help='bound the generated time series value '
                        'using tanh')
    parser.add_argument('--no-squash',
                        dest='squash',
                        action='store_const',
                        const=False)

    # rescale to [-1, 1]
    parser.add_argument('--rescale',
                        dest='rescale',
                        action='store_const',
                        const=True,
                        default=True,
                        help='if set, rescale time to [-1, 1]')
    parser.add_argument('--no-rescale',
                        dest='rescale',
                        action='store_const',
                        const=False)

    args = parser.parse_args()

    batch_size = args.batch_size
    nz = args.nz

    epochs = args.epoch
    plot_interval = args.plot_interval
    save_interval = args.save_interval

    try:
        npz = np.load(args.data)
        train_data = npz['data']
        train_time = npz['time']
        train_mask = npz['mask']
    except FileNotFoundError:
        if args.data != default_dataset:
            raise
        # Generate the default toy dataset from scratch
        train_data, train_time, train_mask, _, _ = gen_data(
            n_samples=10000,
            seq_len=200,
            max_time=1,
            poisson_rate=50,
            obs_span_rate=.25,
            save_file=default_dataset)

    _, in_channels, seq_len = train_data.shape
    train_time *= train_mask

    if args.seed is None:
        rnd = np.random.RandomState(None)
        random_seed = rnd.randint(np.iinfo(np.uint32).max)
    else:
        random_seed = args.seed
    rnd = np.random.RandomState(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)

    # Scale time
    max_time = 5
    train_time *= max_time

    squash = None
    rescaler = None
    if args.rescale:
        rescaler = Rescaler(train_data)
        train_data = rescaler.rescale(train_data)
        if args.squash:
            squash = torch.tanh

    out_channels = 64
    cconv_ref = 98

    train_dataset = TimeSeries(train_data,
                               train_time,
                               train_mask,
                               label=None,
                               max_time=max_time,
                               cconv_ref=cconv_ref,
                               overlap_rate=args.overlap,
                               device=device)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              drop_last=True,
                              collate_fn=train_dataset.collate_fn)
    n_train_batch = len(train_loader)

    time_loader = DataLoader(train_dataset,
                             batch_size=batch_size,
                             shuffle=True,
                             drop_last=True,
                             collate_fn=train_dataset.collate_fn)

    test_loader = DataLoader(train_dataset,
                             batch_size=batch_size,
                             collate_fn=train_dataset.collate_fn)

    grid_decoder = SeqGeneratorDiscrete(in_channels, nz, squash)
    decoder = Decoder(grid_decoder, max_time=max_time).to(device)

    cconv = ContinuousConv1D(in_channels,
                             out_channels,
                             max_time,
                             cconv_ref,
                             overlap_rate=args.overlap,
                             kernel_size=args.comp,
                             norm=True).to(device)
    encoder = Encoder(cconv, nz, not args.no_norm_trans).to(device)

    pbigan = PBiGAN(encoder, decoder, args.aeloss).to(device)

    critic_cconv = ContinuousConv1D(in_channels,
                                    out_channels,
                                    max_time,
                                    cconv_ref,
                                    overlap_rate=args.overlap,
                                    kernel_size=args.comp,
                                    norm=True).to(device)
    critic = ConvCritic(critic_cconv, nz).to(device)

    ema = None
    if args.ema >= 0:
        ema = EMA(pbigan, args.ema_decay, args.ema)

    optimizer = optim.Adam(pbigan.parameters(),
                           lr=args.lr,
                           weight_decay=args.wd)
    critic_optimizer = optim.Adam(critic.parameters(),
                                  lr=args.dis_lr,
                                  weight_decay=args.wd)

    scheduler = make_scheduler(optimizer, args.lr, args.min_lr, epochs)
    dis_scheduler = make_scheduler(critic_optimizer, args.dis_lr,
                                   args.min_dis_lr, epochs)

    path = '{}_{}'.format(args.prefix, datetime.now().strftime('%m%d.%H%M%S'))

    output_dir = Path('results') / 'toy-pbigan' / path
    print(output_dir)
    log_dir = mkdir(output_dir / 'log')
    model_dir = mkdir(output_dir / 'model')

    start_epoch = 0

    with (log_dir / 'seed.txt').open('w') as f:
        print(random_seed, file=f)
    with (log_dir / 'gpu.txt').open('a') as f:
        print(torch.cuda.device_count(), start_epoch, file=f)
    with (log_dir / 'args.txt').open('w') as f:
        for key, val in sorted(vars(args).items()):
            print(f'{key}: {val}', file=f)

    tracker = Tracker(log_dir, n_train_batch)
    visualizer = Visualizer(encoder, decoder, batch_size, max_time,
                            test_loader, rescaler, output_dir, device)
    start = time.time()
    epoch_start = start

    for epoch in range(start_epoch, epochs):
        loss_breakdown = defaultdict(float)

        for ((val, idx, mask, _, cconv_graph),
             (_, idx_t, mask_t, index, _)) in zip(train_loader, time_loader):

            z_enc, x_recon, z_gen, x_gen, ae_loss = pbigan(
                val, idx, mask, cconv_graph, idx_t, mask_t)

            cconv_graph_gen = train_dataset.make_graph(x_gen, idx_t, mask_t,
                                                       index)

            real = critic(cconv_graph, batch_size, z_enc)
            fake = critic(cconv_graph_gen, batch_size, z_gen)

            D_loss = gan_loss(real, fake, 1, 0)

            critic_optimizer.zero_grad()
            D_loss.backward(retain_graph=True)
            critic_optimizer.step()

            G_loss = gan_loss(real, fake, 0, 1)

            mmd_loss = mmd(z_enc, z_gen)

            loss = G_loss + ae_loss * args.ae + mmd_loss * args.mmd

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if ema:
                ema.update()

            loss_breakdown['D'] += D_loss.item()
            loss_breakdown['G'] += G_loss.item()
            loss_breakdown['AE'] += ae_loss.item()
            loss_breakdown['MMD'] += mmd_loss.item()
            loss_breakdown['total'] += loss.item()

        if scheduler:
            scheduler.step()
        if dis_scheduler:
            dis_scheduler.step()

        cur_time = time.time()
        tracker.log(epoch, loss_breakdown, cur_time - epoch_start,
                    cur_time - start)

        if plot_interval > 0 and (epoch + 1) % plot_interval == 0:
            if ema:
                ema.apply()
                visualizer.plot(epoch)
                ema.restore()
            else:
                visualizer.plot(epoch)

        model_dict = {
            'pbigan': pbigan.state_dict(),
            'critic': critic.state_dict(),
            'ema': ema.state_dict() if ema else None,
            'epoch': epoch + 1,
            'args': args,
        }
        torch.save(model_dict, str(log_dir / 'model.pth'))
        if save_interval > 0 and (epoch + 1) % save_interval == 0:
            torch.save(model_dict, str(model_dir / f'{epoch:04d}.pth'))

    print(output_dir)
Ejemplo n.º 21
0
    filter(lambda p: p.requires_grad, model.parameters()))
# optimizer = torch.optim.Adamax(filter(lambda p: p.requires_grad, model.parameters()))

if os.path.isfile(args.resume):
    print("=> loading checkpoint '{}'".format(args.resume))
    checkpoint = torch.load(args.resume)
    args.start_epoch = checkpoint['epoch']
    # best_prec1 = checkpoint['best_prec1']
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])  # TODO ?
    print("=> loaded checkpoint '{}' (epoch {})".format(
        args.resume, checkpoint['epoch']))
else:
    print("=> no checkpoint found at '{}'".format(args.resume))

ema = EMA(0.999)
for name, param in model.named_parameters():
    if param.requires_grad:
        ema.register(name, param.data)

print(model)
print('parameters-----')
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data.size())

if args.test == 1:
    print('Test mode')
    test(model, test_data)
else:
    print('Train mode')
Ejemplo n.º 22
0
 def __init__(self, period):
     self.value = None
     self.last = None
     self.ema_u = EMA(period)
     self.ema_d = EMA(period)
     self.tbl = None
Ejemplo n.º 23
0
    def fit(self, config, device):
        logging.info(json.dumps(config, indent=4, sort_keys=True))

        if config["char_embeddings"]:
            fields = SquadDataset.prepare_fields_char()
        else:
            fields = SquadDataset.prepare_fields()

        train, val = SquadDataset.splits(fields)
        fields = dict(fields)

        fields["question"].build_vocab(train, val, vectors=GloVe(name='6B', dim=config["embedding_size"]))

        if not type(fields["question_char"]) == torchtext.data.field.RawField:
            fields["question_char"].build_vocab(train, val, max_size=config["char_maxsize_vocab"])

        # Make if shuffle
        train_iter = BucketIterator(train, sort_key=lambda x: -(len(x.question) + len(x.document)),
                                    shuffle=True, sort=False, sort_within_batch=True,
                                    batch_size=config["batch_size"], train=True,
                                    repeat=False,
                                    device=device)

        val_iter = BucketIterator(val, sort_key=lambda x: -(len(x.question) + len(x.document)), sort=True,
                                  batch_size=config["batch_size"],
                                  repeat=False,
                                  device=device)
        #
        # model = torch.load(
        #     "saved/65F1_checkpoint_<class 'trainer.ModelFramework'>_L_2.1954014434733815_2019-06-28_10:06_pcknot2.pt").to(
        #     device)
        if config["modelname"] == "baseline":
            model = Baseline(config, fields["question"].vocab).to(device)
        elif config["modelname"] == "bidaf_simplified":
            model = BidafSimplified(config, fields["question"].vocab).to(device)
        elif config["modelname"] == "bidaf":
            model = BidAF(config, fields['question'].vocab, fields["question_char"].vocab).to(device)
        # glorot_param_init(model)
        logging.info(f"Models has {count_parameters(model)} parameters")
        param_sizes, param_shapes = report_parameters(model)
        param_sizes = "\n'".join(str(param_sizes).split(", '"))
        param_shapes = "\n'".join(str(param_shapes).split(", '"))
        logging.debug(f"Model structure:\n{param_sizes}\n{param_shapes}\n")

        if config["optimizer"] == "adam":
            optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()),
                             lr=config["learning_rate"])
        else:
            raise NotImplementedError(f"Option {config['optimizer']} for \"optimizer\" setting is undefined.")

        start_time = time.time()
        try:
            best_val_loss = math.inf
            best_val_f1 = 0
            best_em = 0
            ema_active = False
            for it in range(config["max_iterations"]):
                logging.info(f"Iteration {it}")
                if "ema" in config and config["ema"]:
                    ema = EMA.ema_register(config, model)
                    ema_active = True

                self.train_epoch(model, CrossEntropyLoss(), optimizer, train_iter)

                if ema_active:
                    EMA.ema_update(ema, model)

                validation_loss, em, f1 = self.validate(model, CrossEntropyLoss(reduction='none'), val_iter,
                                                        ema=ema if "ema" in config and config[
                                                            "ema"] and ema_active else None)
                if validation_loss < best_val_loss: best_val_loss = validation_loss
                if f1 > best_val_f1: best_val_f1 = validation_loss
                if em > best_em: best_em = em
                logging.info(f"BEST L/F1/EM = {best_val_loss:.2f}/{best_val_f1:.2f}/{best_em:.2f}")
                if em > 65:
                    # Do all this on CPU, this is memory exhaustive!
                    model.to(torch.device("cpu"))

                    if ema_active:
                        # backup current params and load ema params
                        backup_params = EMA.ema_backup_and_loadavg(ema, model)

                        torch.save(model,
                                   f"saved/checkpoint"
                                   f"_{str(self.__class__)}"
                                   f"_EM_{em:.2f}_F1_{f1:.2f}_L_{validation_loss:.2f}_{get_timestamp()}"
                                   f"_{socket.gethostname()}.pt")

                        # load back backed up params
                        EMA.ema_restore_backed_params(backup_params, model)

                    else:
                        torch.save(model,
                                   f"saved/checkpoint"
                                   f"_{str(self.__class__)}"
                                   f"_EM_{em:.2}_F1_{f1:.2}_L_{validation_loss:.2}_{get_timestamp()}"
                                   f"_{socket.gethostname()}.pt")

                    model.to(device)
                logging.info(f"Validation loss: {validation_loss}")

        except KeyboardInterrupt:
            logging.info('-' * 120)
            logging.info('Exit from training early.')
        finally:
            logging.info(f'Finished after {(time.time() - start_time) / 60} minutes.')
Ejemplo n.º 24
0
def train_bidaf(args, data):
    device = torch.device(
        f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    model = BiDAF(args, data.WORD.vocab.vectors).to(device)

    ema = EMA(args.exp_decay_rate)
    for name, param in model.named_parameters():
        if param.requires_grad:
            ema.register(name, param.data)
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adadelta(parameters, lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    writer = SummaryWriter(logdir='runs/' + args.model_time)

    model.train()
    loss, last_epoch = 0, -1
    max_dev_exact, max_dev_f1 = -1, -1

    iterator = data.train_iter
    for i, batch in tqdm(enumerate(iterator)):
        present_epoch = int(iterator.epoch)
        if present_epoch == args.epoch:
            break
        if present_epoch > last_epoch:
            print('epoch:', present_epoch + 1)
        last_epoch = present_epoch

        p1, p2 = model(batch)

        optimizer.zero_grad()
        # print(p1, batch.s_idx)
        # print(p2, batch.e_idx)
        batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
        # print('p1', p1.shape, p1)
        # print('batch.s_idx', batch.s_idx.shape, batch.s_idx.shape)
        # print(loss, batch_loss.item())
        loss += batch_loss.item()
        # print(loss)
        # print(batch_loss.item())
        batch_loss.backward()
        optimizer.step()

        for name, param in model.named_parameters():
            if param.requires_grad:
                ema.update(name, param.data)

        if (i + 1) % args.print_freq == 0:
            dev_loss, dev_exact, dev_f1 = test(model, ema, args, data)
            c = (i + 1) // args.print_freq

            writer.add_scalar('loss/train', loss, c)
            writer.add_scalar('loss/dev', dev_loss, c)
            writer.add_scalar('exact_match/dev', dev_exact, c)
            writer.add_scalar('f1/dev', dev_f1, c)
            print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f}'
                  f' / dev EM: {dev_exact:.3f} / dev F1: {dev_f1:.3f}')

            if dev_f1 > max_dev_f1:
                max_dev_f1 = dev_f1
                max_dev_exact = dev_exact
                best_model = copy.deepcopy(model)

            loss = 0
            model.train()

    writer.close()
    print(f'max dev EM: {max_dev_exact:.3f} / max dev F1: {max_dev_f1:.3f}')

    return best_model
Ejemplo n.º 25
0
    summ = np.vstack((summ, s))

rsi_returns = pd.DataFrame(
    summ, columns=['Security', 'Return', 'Holding Period', 'Purchase Date'])
rsi_returns.to_csv(
    r'Z:\School\Sem 4\Research Methodology\Paper\RSI_Returns.csv', index=False)

#EMA (suited for 13-34 days. For other periods, change the period_1 and period_2 attribute while creating EMA object.

for s in samples:
    df = pd.read_csv(r'Z:\School\Sem 4\Research Methodology\Paper\Sample\\' +
                     s)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df[df['Date'] >= start_date]
    df = df[df['Date'] <= end_date]
    ema_obj = EMA(period_1=13, period_2=34)
    ema_obj.generate(df, calc_returns=True)
    summ = ema_obj.get_summary()
    summaries.append(
        np.hstack(([[s[:-7]] for _ in range(summ.shape[0])], summ)))

summ = summaries[0]
for i, s in enumerate(summaries):
    if i == 0:
        continue
    summ = np.vstack((summ, s))

ema_returns = pd.DataFrame(
    summ, columns=['Security', 'Return', 'Holding Period', 'Purchase Date'])
ema_returns.to_csv(
    r'Z:\School\Sem 4\Research Methodology\Paper\EMA_13_34_Returns.csv',
Ejemplo n.º 26
0
 def __init__(self, period):
     self.value = None
     self.last = None
     self.ema_u = EMA(period)
     self.ema_d = EMA(period)
     self.tbl = None
Ejemplo n.º 27
0
def train():
    n_iters_per_epoch = args.n_imgs_per_epoch // args.batchsize
    n_iters_all = n_iters_per_epoch * args.n_epochs

    model, criteria_x, criteria_u = set_model()

    dltrain_x, dltrain_u = get_train_loader(args.batchsize,
                                            args.mu,
                                            n_iters_per_epoch,
                                            L=args.n_labeled,
                                            seed=args.seed)
    lb_guessor = LabelGuessor(thresh=args.thr)

    ema = EMA(model, args.ema_alpha)

    wd_params, non_wd_params = [], []
    for param in model.parameters():
        if len(param.size()) == 1:
            non_wd_params.append(param)
        else:
            wd_params.append(param)
    param_list = [{
        'params': wd_params
    }, {
        'params': non_wd_params,
        'weight_decay': 0
    }]
    optim = torch.optim.SGD(param_list,
                            lr=args.lr,
                            weight_decay=args.weight_decay,
                            momentum=args.momentum,
                            nesterov=True)
    lr_schdlr = WarmupCosineLrScheduler(optim,
                                        max_iter=n_iters_all,
                                        warmup_iter=0)

    train_args = dict(
        model=model,
        criteria_x=criteria_x,
        criteria_u=criteria_u,
        optim=optim,
        lr_schdlr=lr_schdlr,
        ema=ema,
        dltrain_x=dltrain_x,
        dltrain_u=dltrain_u,
        lb_guessor=lb_guessor,
        lambda_u=args.lam_u,
        lambda_c=args.lam_c,
        n_iters=n_iters_per_epoch,
    )
    best_acc = -1
    print('start to train')
    for e in range(args.n_epochs):
        model.train()
        print('epoch: {}'.format(e + 1))
        train_one_epoch(**train_args)
        torch.cuda.empty_cache()

        acc = evaluate(ema)
        best_acc = acc if best_acc < acc else best_acc
        log_msg = [
            'epoch: {}'.format(e), 'acc: {:.4f}'.format(acc),
            'best_acc: {:.4f}'.format(best_acc)
        ]
        print(', '.join(log_msg))

    sort_unlabeled(ema)
images_grid = unnormalize(images_grid)
save_grid(images_grid)
writer.add_image('input_images', images_grid, 0)


############################# Model ####################################
'''
Classification model is initialized here, along with exponential
moving average (EMA) module:
    - model is pushed to gpu if its available.
'''

net = Wide_ResNet(28, 2, 0.3, 10)  # VanillaNet()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
ema = EMA(net, decay=0.9999)


############################## Utils ###################################
'''
Training utils are initialized here, including:
    - CrossEntropyLoss - supervised loss.
    - KLDivLoss - unsupervised consistency loss
    - SGD optimizer
    - CosineAnnealingLR scheduler
    - Evaluation function
'''

criterion_sup = torch.nn.CrossEntropyLoss()
criterion_unsup = torch.nn.KLDivLoss(reduction='none')
optimizer = torch.optim.SGD(