Beispiel #1
0
def fitness(lr, l2_reg):
    set_seed(2021)
    device = torch.device('cuda')
    dataset_config = {
        'name': 'ProcessedDataset',
        'path': 'data/Gowalla/time',
        'device': device,
        'neg_ratio': 4
    }
    model_config = {
        'name': 'NeuMF',
        'embedding_size': 64,
        'device': device,
        'layer_sizes': [64, 64, 64]
    }
    trainer_config = {
        'name': 'BCETrainer',
        'optimizer': 'Adam',
        'lr': lr,
        'l2_reg': l2_reg,
        'device': device,
        'n_epochs': 1000,
        'batch_size': 2048,
        'dataloader_num_workers': 6,
        'test_batch_size': 64,
        'topks': [20],
        'mf_pretrain_epochs': 100,
        'mlp_pretrain_epochs': 100,
        'max_patience': 100
    }
    dataset = get_dataset(dataset_config)
    model = get_model(model_config, dataset)
    trainer = get_trainer(trainer_config, dataset, model)
    return trainer.train(verbose=True)
Beispiel #2
0
def main():
    log_path = __file__[:-3]
    init_run(log_path, 2021)

    device = torch.device('cuda')
    config = get_gowalla_config(device)
    dataset_config, model_config, trainer_config = config[3]
    dataset_config['path'] = dataset_config['path'][:-4] + '0_dropui'

    dataset = get_dataset(dataset_config)
    model = get_model(model_config, dataset)

    dataset_config['path'] = dataset_config['path'][:-7]
    new_dataset = get_dataset(dataset_config)
    model.config['dataset'] = new_dataset
    model.n_users, model.n_items = new_dataset.n_users, new_dataset.n_items
    data_mat = sp.coo_matrix((np.ones((len(
        new_dataset.train_array), )), np.array(new_dataset.train_array).T),
                             shape=(new_dataset.n_users, new_dataset.n_items),
                             dtype=np.float32).tocsr()
    model.data_mat = data_mat
    sim_mat = model.sim_mat.tocoo()
    sim_mat = sp.coo_matrix((sim_mat.data, (sim_mat.row, sim_mat.col)),
                            shape=(new_dataset.n_items, new_dataset.n_items))
    model.sim_mat = sim_mat.tocsr()
    trainer = get_trainer(trainer_config, new_dataset, model)
    trainer.inductive_eval(dataset.n_users, dataset.n_items)
Beispiel #3
0
  def ScanForHW(self):
    global dev_trainer, dev_ant, simulatetrainer
    #get ant stick
    if debug:print "get ant stick"
    if not dev_ant:
      dev_ant, msg = ant.get_ant(debug)
      if not dev_ant:
        self.ANTVariable.set(u"no ANT dongle found")
        return
    self.ANTVariable.set(u"ANT dongle found")


    self.PowerFactorVariable.set(powerfactor)
    if debug:print "get trainer"
    #find trainer model for Windows and Linux
    if not dev_trainer:
      #find trainer
      if simulatetrainer:
        self.trainerVariable.set(u"Simulated Trainer")
      else:
        dev_trainer = trainer.get_trainer()
        if not dev_trainer:
          self.trainerVariable.set("Trainer not detected")
          return
        else:
          self.trainerVariable.set("Trainer detected")
          trainer.initialise_trainer(dev_trainer)#initialise trainer
          
    self.StartAPPbutton.config(state="normal")
Beispiel #4
0
 def server_train(order, num_use_pool):
     train_set = warmstart_set + [pool_set[o] for o in order[:num_use_pool]]
     trainer = get_trainer(args.model, args.model_seed, args.domain,
                           f'cuda:{args.gpu_idx}')
     model, _ = trainer.train(train_set, model_sel_set, args.batchsize,
                              args.max_epoch, args.patience, False)
     metric = trainer.evaluate_f1(model, eval_set)
     return metric
Beispiel #5
0
def main(criterion,
         evaluation_set,
         model_seed=0,
         data_seed=0,
         batchsize=25,
         max_epoch=100,
         patience=20,
         tot_acq=250,
         gpu_idx=0):
    data = pickle.load(open('data/restaurant.pkl', 'rb'))['seeds'][data_seed]
    train_set = copy(data['warmstart'])
    model_sel_set = copy(data['train_valid'])
    pool_dict = {i: p for i, p in enumerate(data['pool'])}
    eval_set = data[evaluation_set]
    eval_sents, eval_tags = zip(*eval_set)
    curve = []
    data_order = []
    trainer = get_trainer(model_seed, device=f'cuda:{gpu_idx}')
    trainer.train(train_set,
                  model_sel_set,
                  batchsize,
                  max_epoch,
                  patience,
                  verbose=False)
    f1 = trainer.evaluate_f1(trainer.best_model, eval_sents, eval_tags)
    curve.append(f1)
    for _ in trange(int(tot_acq / batchsize)):
        acquire_idxs = acquire(trainer.best_model, pool_dict, criterion,
                               batchsize)
        data_order.extend(acquire_idxs)
        for idx in acquire_idxs:
            train_set.append(pool_dict[idx])
            del pool_dict[idx]
        trainer = get_trainer(model_seed, device=f'cuda:{gpu_idx}')
        trainer.train(train_set,
                      model_sel_set,
                      batchsize,
                      max_epoch,
                      patience,
                      verbose=False)
        f1 = trainer.evaluate_f1(trainer.best_model, eval_sents, eval_tags)
        curve.append(f1)
    print(curve)
    print(np.mean(curve))
    store_baseline(curve, data_order, criterion, evaluation_set, model_seed,
                   data_seed, batchsize, max_epoch, patience, tot_acq)
Beispiel #6
0
def main(args):

    if args.mode != 'ensemble':

        # Dataloaders
        dls = get_dataloaders(args)

        # Model
        model = get_model(args)

        # Trainer
        trainer = get_trainer(args, dls)

    # Mode
    if args.mode in ['train', 'training']:
        trainer.fit(model, dls['train_aug'], dls['validation'])
        trainer.validate(model=None, val_dataloaders=dls['validation'])
        trainer.test(model=None, test_dataloaders=dls['test'])

    elif args.mode in ['validate', 'validation', 'validating']:
        trainer.validate(model, val_dataloaders=dls['validation'])

    elif args.mode in ['test', 'testing']:
        trainer.test(model, test_dataloaders=dls['test'])

    elif args.mode == 'ensemble':
        predictions = []
        labels = []
        backbones = args.backbone
        model_checkpoints = args.model_checkpoint
        for backbone, model_checkpoint in zip(backbones, model_checkpoints):
            args.backbone = backbone
            args.model_checkpoint = model_checkpoint

            dls = get_dataloaders(args)
            model = get_model(args)

            # Predict
            model.eval().cuda()
            preds = []
            lbls = []
            for x, y in tqdm(dls['test']):
                logits = model(x.cuda()).detach().cpu()
                preds += [logits]
                lbls += [y]
            preds = torch.cat(preds, 0)
            predictions += [preds]
            lbls = torch.cat(lbls, 0)
            labels += [lbls]
            acc_single = accuracy(preds, lbls)
            print(f'{backbone} accuracy: {acc_single}')
        predictions = torch.stack(predictions, 1).mean(1)
        labels = labels[0]
        acc = accuracy(predictions, labels)
        print(f'Ensamble Accuracy: {acc}')

    else:
        raise Exception(f'Error. Model "{args.mode}" not supported.')
def idmr(data, criterion, evaluation_set, lens_proportions, model, model_seed, domain, data_seed,
         batchsize, max_epoch, patience, tot_acq, gpu_idx):
    '''
    lens_proportions is a dictionary from (len_low, len_high), both inclusive, to proportions.
    the acquisition order will try to preserve this proportion in the warmstart + active-acquired set
    '''
    len_groups = list(lens_proportions.keys())
    proportions = list(lens_proportions.values())
    len_groups_dict = {v: i for i, vs in enumerate(len_groups) for v in range(vs[0], vs[1]+1)}

    train_set = copy(data['warmstart'])
    pool_set = copy(data['pool'])
    pool_dict = {i: p for i, p in enumerate(pool_set)}
    model_sel_set = data['train_valid']
    eval_set = data[evaluation_set]
    f1s = []

    mcdropout = (criterion == 'bald')
    trainer = get_trainer(model, model_seed, domain, f'cuda:{gpu_idx}', mcdropout)
    trainer.train(train_set, model_sel_set, batchsize, max_epoch, patience, verbose=False)
    f1 = trainer.evaluate_f1(trainer.best_model, eval_set)
    f1s.append(f1)
    data_order = []
    for _ in trange(int(tot_acq / batchsize)):
        pool_idxs = list(pool_dict.keys())
        pool_sents, _ = zip(*pool_dict.values())
        if criterion == 'bald':
            pool_scores = score_bald(trainer.best_model, pool_sents)
        elif criterion == 'max-entropy':
            pool_scores = score_maxent(trainer.best_model, pool_sents)
        sorted_pool_scores, sorted_pool_idxs = zip(*sorted(zip(pool_scores, pool_idxs)))
        pool_lens = [len(nltk.word_tokenize(pool_dict[p][0])) for p in sorted_pool_idxs]
        train_lens = [len(nltk.word_tokenize(t[0])) for t in train_set]
        use_idxs = select_batch(batchsize, sorted_pool_idxs, pool_lens, train_lens, proportions, len_groups_dict, len_groups)
        train_set = train_set + [pool_dict[i] for i in use_idxs]
        data_order = data_order + use_idxs
        for idx in use_idxs:
            del pool_dict[idx]
        trainer = get_trainer(model, model_seed, domain, f'cuda:{gpu_idx}', mcdropout)
        trainer.train(train_set, model_sel_set, batchsize, max_epoch, patience, verbose=False)
        f1 = trainer.evaluate_f1(trainer.best_model, eval_set)
        f1s.append(f1)
    return f1s, data_order
Beispiel #8
0
 def server_train(order, num_use):
     use_t = list(warmstart_t) + [pool_t[o] for o in order[:num_use]]
     use_m = list(warmstart_m) + [pool_m[o] for o in order[:num_use]]
     use_l = list(warmstart_l) + [pool_l[o] for o in order[:num_use]]
     train_set = (use_t, use_m, use_l)
     trainer = get_trainer(args.model, args.model_seed, args.domain,
                           f'cuda:{args.gpu_idx}')
     model, _ = trainer.train(train_set, model_sel_set, args.batchsize,
                              args.max_epoch, args.patience, False)
     f1 = trainer.evaluate_f1(model, eval_set, args.batchsize)
     return f1
def main(args):
    x, fx = get_data(args)

    device = torch.device("cuda" if args.cuda else "cpu")
    train_data, val_data = split_data(args, x, fx)

    if args.save_splits:
        save_splits(train_data, val_data)

    train_loader, val_loader = get_loaders(train_data, val_data)

    model = get_model(args)

    trainer = get_trainer(model, train_loader, val_loader, device, args)
    trainer.train()
def recover_pack():
    train_loader, test_loader = get_loader()

    pack = dotdict({
        'net': get_model(),
        'train_loader': train_loader,
        'test_loader': test_loader,
        'trainer': get_trainer(),
        'criterion': get_criterion(),
        'optimizer': None,
        'lr_scheduler': None
    })

    adjust_learning_rate(cfg.base.epoch, pack)
    return pack
Beispiel #11
0
def main():
    log_path = __file__[:-3]
    init_run(log_path, 2021)

    device = torch.device('cuda')
    config = get_gowalla_config(device)
    dataset_config, model_config, trainer_config = config[5]
    dataset_config['path'] = dataset_config['path'][:-4] + '0_dropui'

    writer = SummaryWriter(log_path)
    dataset = get_dataset(dataset_config)
    model = get_model(model_config, dataset)
    trainer = get_trainer(trainer_config, dataset, model)
    trainer.train(verbose=True, writer=writer)
    writer.close()

    dataset_config['path'] = dataset_config['path'][:-7]
    new_dataset = get_dataset(dataset_config)
    model.config['dataset'] = new_dataset
    model.n_users, model.n_items = new_dataset.n_users, new_dataset.n_items
    data_mat = model.get_data_mat(new_dataset)[:, :dataset.n_items]
    model.normalized_data_mat = normalize(data_mat, axis=1, norm='l2')
    trainer = get_trainer(trainer_config, new_dataset, model)
    trainer.inductive_eval(dataset.n_users, dataset.n_items)
def server_train(order, num_use_pool):
    effective_pool = list([dataset['pool'][o] for o in order[:num_use_pool]])
    train_set = dataset['warmstart'] + effective_pool
    train_X, train_y = map(np.array, zip(*train_set))
    train_X = torch.tensor(train_X).float().to(f'cuda:{args.gpu_idx}')
    train_y = torch.tensor(train_y).long().to(f'cuda:{args.gpu_idx}')
    trainer = get_trainer(args.model_seed,
                          f'cuda:{args.gpu_idx}',
                          mcdropout=False)
    trainer.train((train_X, train_y), (model_sel_X, model_sel_y),
                  args.batchsize,
                  args.max_epoch,
                  args.patience,
                  test_steps=None,
                  verbose=False)
    metric = trainer.evaluate_acc(trainer.best_model, eval_X, eval_y)
    return metric