def fitness(lr, l2_reg): set_seed(2021) device = torch.device('cuda') dataset_config = { 'name': 'ProcessedDataset', 'path': 'data/Gowalla/time', 'device': device, 'neg_ratio': 4 } model_config = { 'name': 'NeuMF', 'embedding_size': 64, 'device': device, 'layer_sizes': [64, 64, 64] } trainer_config = { 'name': 'BCETrainer', 'optimizer': 'Adam', 'lr': lr, 'l2_reg': l2_reg, 'device': device, 'n_epochs': 1000, 'batch_size': 2048, 'dataloader_num_workers': 6, 'test_batch_size': 64, 'topks': [20], 'mf_pretrain_epochs': 100, 'mlp_pretrain_epochs': 100, 'max_patience': 100 } dataset = get_dataset(dataset_config) model = get_model(model_config, dataset) trainer = get_trainer(trainer_config, dataset, model) return trainer.train(verbose=True)
def main(): log_path = __file__[:-3] init_run(log_path, 2021) device = torch.device('cuda') config = get_gowalla_config(device) dataset_config, model_config, trainer_config = config[3] dataset_config['path'] = dataset_config['path'][:-4] + '0_dropui' dataset = get_dataset(dataset_config) model = get_model(model_config, dataset) dataset_config['path'] = dataset_config['path'][:-7] new_dataset = get_dataset(dataset_config) model.config['dataset'] = new_dataset model.n_users, model.n_items = new_dataset.n_users, new_dataset.n_items data_mat = sp.coo_matrix((np.ones((len( new_dataset.train_array), )), np.array(new_dataset.train_array).T), shape=(new_dataset.n_users, new_dataset.n_items), dtype=np.float32).tocsr() model.data_mat = data_mat sim_mat = model.sim_mat.tocoo() sim_mat = sp.coo_matrix((sim_mat.data, (sim_mat.row, sim_mat.col)), shape=(new_dataset.n_items, new_dataset.n_items)) model.sim_mat = sim_mat.tocsr() trainer = get_trainer(trainer_config, new_dataset, model) trainer.inductive_eval(dataset.n_users, dataset.n_items)
def ScanForHW(self): global dev_trainer, dev_ant, simulatetrainer #get ant stick if debug:print "get ant stick" if not dev_ant: dev_ant, msg = ant.get_ant(debug) if not dev_ant: self.ANTVariable.set(u"no ANT dongle found") return self.ANTVariable.set(u"ANT dongle found") self.PowerFactorVariable.set(powerfactor) if debug:print "get trainer" #find trainer model for Windows and Linux if not dev_trainer: #find trainer if simulatetrainer: self.trainerVariable.set(u"Simulated Trainer") else: dev_trainer = trainer.get_trainer() if not dev_trainer: self.trainerVariable.set("Trainer not detected") return else: self.trainerVariable.set("Trainer detected") trainer.initialise_trainer(dev_trainer)#initialise trainer self.StartAPPbutton.config(state="normal")
def server_train(order, num_use_pool): train_set = warmstart_set + [pool_set[o] for o in order[:num_use_pool]] trainer = get_trainer(args.model, args.model_seed, args.domain, f'cuda:{args.gpu_idx}') model, _ = trainer.train(train_set, model_sel_set, args.batchsize, args.max_epoch, args.patience, False) metric = trainer.evaluate_f1(model, eval_set) return metric
def main(criterion, evaluation_set, model_seed=0, data_seed=0, batchsize=25, max_epoch=100, patience=20, tot_acq=250, gpu_idx=0): data = pickle.load(open('data/restaurant.pkl', 'rb'))['seeds'][data_seed] train_set = copy(data['warmstart']) model_sel_set = copy(data['train_valid']) pool_dict = {i: p for i, p in enumerate(data['pool'])} eval_set = data[evaluation_set] eval_sents, eval_tags = zip(*eval_set) curve = [] data_order = [] trainer = get_trainer(model_seed, device=f'cuda:{gpu_idx}') trainer.train(train_set, model_sel_set, batchsize, max_epoch, patience, verbose=False) f1 = trainer.evaluate_f1(trainer.best_model, eval_sents, eval_tags) curve.append(f1) for _ in trange(int(tot_acq / batchsize)): acquire_idxs = acquire(trainer.best_model, pool_dict, criterion, batchsize) data_order.extend(acquire_idxs) for idx in acquire_idxs: train_set.append(pool_dict[idx]) del pool_dict[idx] trainer = get_trainer(model_seed, device=f'cuda:{gpu_idx}') trainer.train(train_set, model_sel_set, batchsize, max_epoch, patience, verbose=False) f1 = trainer.evaluate_f1(trainer.best_model, eval_sents, eval_tags) curve.append(f1) print(curve) print(np.mean(curve)) store_baseline(curve, data_order, criterion, evaluation_set, model_seed, data_seed, batchsize, max_epoch, patience, tot_acq)
def main(args): if args.mode != 'ensemble': # Dataloaders dls = get_dataloaders(args) # Model model = get_model(args) # Trainer trainer = get_trainer(args, dls) # Mode if args.mode in ['train', 'training']: trainer.fit(model, dls['train_aug'], dls['validation']) trainer.validate(model=None, val_dataloaders=dls['validation']) trainer.test(model=None, test_dataloaders=dls['test']) elif args.mode in ['validate', 'validation', 'validating']: trainer.validate(model, val_dataloaders=dls['validation']) elif args.mode in ['test', 'testing']: trainer.test(model, test_dataloaders=dls['test']) elif args.mode == 'ensemble': predictions = [] labels = [] backbones = args.backbone model_checkpoints = args.model_checkpoint for backbone, model_checkpoint in zip(backbones, model_checkpoints): args.backbone = backbone args.model_checkpoint = model_checkpoint dls = get_dataloaders(args) model = get_model(args) # Predict model.eval().cuda() preds = [] lbls = [] for x, y in tqdm(dls['test']): logits = model(x.cuda()).detach().cpu() preds += [logits] lbls += [y] preds = torch.cat(preds, 0) predictions += [preds] lbls = torch.cat(lbls, 0) labels += [lbls] acc_single = accuracy(preds, lbls) print(f'{backbone} accuracy: {acc_single}') predictions = torch.stack(predictions, 1).mean(1) labels = labels[0] acc = accuracy(predictions, labels) print(f'Ensamble Accuracy: {acc}') else: raise Exception(f'Error. Model "{args.mode}" not supported.')
def idmr(data, criterion, evaluation_set, lens_proportions, model, model_seed, domain, data_seed, batchsize, max_epoch, patience, tot_acq, gpu_idx): ''' lens_proportions is a dictionary from (len_low, len_high), both inclusive, to proportions. the acquisition order will try to preserve this proportion in the warmstart + active-acquired set ''' len_groups = list(lens_proportions.keys()) proportions = list(lens_proportions.values()) len_groups_dict = {v: i for i, vs in enumerate(len_groups) for v in range(vs[0], vs[1]+1)} train_set = copy(data['warmstart']) pool_set = copy(data['pool']) pool_dict = {i: p for i, p in enumerate(pool_set)} model_sel_set = data['train_valid'] eval_set = data[evaluation_set] f1s = [] mcdropout = (criterion == 'bald') trainer = get_trainer(model, model_seed, domain, f'cuda:{gpu_idx}', mcdropout) trainer.train(train_set, model_sel_set, batchsize, max_epoch, patience, verbose=False) f1 = trainer.evaluate_f1(trainer.best_model, eval_set) f1s.append(f1) data_order = [] for _ in trange(int(tot_acq / batchsize)): pool_idxs = list(pool_dict.keys()) pool_sents, _ = zip(*pool_dict.values()) if criterion == 'bald': pool_scores = score_bald(trainer.best_model, pool_sents) elif criterion == 'max-entropy': pool_scores = score_maxent(trainer.best_model, pool_sents) sorted_pool_scores, sorted_pool_idxs = zip(*sorted(zip(pool_scores, pool_idxs))) pool_lens = [len(nltk.word_tokenize(pool_dict[p][0])) for p in sorted_pool_idxs] train_lens = [len(nltk.word_tokenize(t[0])) for t in train_set] use_idxs = select_batch(batchsize, sorted_pool_idxs, pool_lens, train_lens, proportions, len_groups_dict, len_groups) train_set = train_set + [pool_dict[i] for i in use_idxs] data_order = data_order + use_idxs for idx in use_idxs: del pool_dict[idx] trainer = get_trainer(model, model_seed, domain, f'cuda:{gpu_idx}', mcdropout) trainer.train(train_set, model_sel_set, batchsize, max_epoch, patience, verbose=False) f1 = trainer.evaluate_f1(trainer.best_model, eval_set) f1s.append(f1) return f1s, data_order
def server_train(order, num_use): use_t = list(warmstart_t) + [pool_t[o] for o in order[:num_use]] use_m = list(warmstart_m) + [pool_m[o] for o in order[:num_use]] use_l = list(warmstart_l) + [pool_l[o] for o in order[:num_use]] train_set = (use_t, use_m, use_l) trainer = get_trainer(args.model, args.model_seed, args.domain, f'cuda:{args.gpu_idx}') model, _ = trainer.train(train_set, model_sel_set, args.batchsize, args.max_epoch, args.patience, False) f1 = trainer.evaluate_f1(model, eval_set, args.batchsize) return f1
def main(args): x, fx = get_data(args) device = torch.device("cuda" if args.cuda else "cpu") train_data, val_data = split_data(args, x, fx) if args.save_splits: save_splits(train_data, val_data) train_loader, val_loader = get_loaders(train_data, val_data) model = get_model(args) trainer = get_trainer(model, train_loader, val_loader, device, args) trainer.train()
def recover_pack(): train_loader, test_loader = get_loader() pack = dotdict({ 'net': get_model(), 'train_loader': train_loader, 'test_loader': test_loader, 'trainer': get_trainer(), 'criterion': get_criterion(), 'optimizer': None, 'lr_scheduler': None }) adjust_learning_rate(cfg.base.epoch, pack) return pack
def main(): log_path = __file__[:-3] init_run(log_path, 2021) device = torch.device('cuda') config = get_gowalla_config(device) dataset_config, model_config, trainer_config = config[5] dataset_config['path'] = dataset_config['path'][:-4] + '0_dropui' writer = SummaryWriter(log_path) dataset = get_dataset(dataset_config) model = get_model(model_config, dataset) trainer = get_trainer(trainer_config, dataset, model) trainer.train(verbose=True, writer=writer) writer.close() dataset_config['path'] = dataset_config['path'][:-7] new_dataset = get_dataset(dataset_config) model.config['dataset'] = new_dataset model.n_users, model.n_items = new_dataset.n_users, new_dataset.n_items data_mat = model.get_data_mat(new_dataset)[:, :dataset.n_items] model.normalized_data_mat = normalize(data_mat, axis=1, norm='l2') trainer = get_trainer(trainer_config, new_dataset, model) trainer.inductive_eval(dataset.n_users, dataset.n_items)
def server_train(order, num_use_pool): effective_pool = list([dataset['pool'][o] for o in order[:num_use_pool]]) train_set = dataset['warmstart'] + effective_pool train_X, train_y = map(np.array, zip(*train_set)) train_X = torch.tensor(train_X).float().to(f'cuda:{args.gpu_idx}') train_y = torch.tensor(train_y).long().to(f'cuda:{args.gpu_idx}') trainer = get_trainer(args.model_seed, f'cuda:{args.gpu_idx}', mcdropout=False) trainer.train((train_X, train_y), (model_sel_X, model_sel_y), args.batchsize, args.max_epoch, args.patience, test_steps=None, verbose=False) metric = trainer.evaluate_acc(trainer.best_model, eval_X, eval_y) return metric