def test(test_data, model, experiment: Experiment): global TEST_MINI_BATCH global EPOCH with experiment.test(): with th.no_grad(): model.eval() model.cuda() batches = len(test_data) total_loss = 0 loss_func = nn.L1Loss() for x, in tqdm(test_data): x = x.cuda() prediction, _ = model(x) loss = loss_func(prediction, x) # loss = MyLoss(x, prediction) experiment.log_metric( "mini-batch loss", loss.item(), step=TEST_MINI_BATCH) TEST_MINI_BATCH += 1 total_loss += loss.item() average_loss = total_loss / batches experiment.log_metric("batch loss", average_loss, step=EPOCH) EPOCH += 1 return average_loss
def main(): # capture the config path from the run arguments # then process the json configuration file try: args = get_args() config = process_config(args.config) except: print("missing or invalid arguments") exit(0) print('Create the data generator.') data_loader = RobertaDataLoader(config) print('Create the model.') model = RobertaModel(config) print('Creating the Experiment') experiment = Experiment(api_key=config.exp.comet_api_key, project_name=config.exp.name, auto_output_logging="simple") print('Create the trainer') trainer = RobertaTrainer(model.model, experiment, config, data_loader.get_train_data()) with experiment.train(): print('Start training the model.') trainer.train() model.save() with experiment.test(): print('Predicting the testing data') trainer.predict(data_loader.get_test_data(), data_loader.get_tokenizer())
def train(hyper_params): mnist = get_data() # Get graph definition, tensors and ops train_step, cross_entropy, accuracy, x, y, y_ = build_model_graph( hyper_params) experiment = Experiment(project_name="tf") experiment.log_parameters(hyper_params) experiment.log_dataset_hash(mnist) with tf.Session() as sess: with experiment.train(): sess.run(tf.global_variables_initializer()) experiment.set_model_graph(sess.graph) for i in range(hyper_params["steps"]): batch = mnist.train.next_batch(hyper_params["batch_size"]) experiment.set_step(i) # Compute train accuracy every 10 steps if i % 10 == 0: train_accuracy = accuracy.eval(feed_dict={ x: batch[0], y_: batch[1] }) print('step %d, training accuracy %g' % (i, train_accuracy)) experiment.log_metric("accuracy", train_accuracy, step=i) # Update weights (back propagation) _, loss_val = sess.run([train_step, cross_entropy], feed_dict={ x: batch[0], y_: batch[1] }) experiment.log_metric("loss", loss_val, step=i) ### Finished Training ### with experiment.test(): # Compute test accuracy acc = accuracy.eval(feed_dict={ x: mnist.test.images, y_: mnist.test.labels }) experiment.log_metric("accuracy", acc) print('test accuracy %g' % acc)
def test(test_data, model, experiment: Experiment): global TEST_MINI_BATCH model.eval() model.cuda() batches = len(test_data) total_loss = 0 predictions = [] ground_truth = [] with experiment.test(): for x, y in tqdm(test_data): x = x.cuda() y = y.cuda().float() prediction = model(x) loss = F.binary_cross_entropy(prediction, y) total_loss += loss.item() prediction = prediction >= 0.5 predictions.append(prediction.detach().cpu().numpy()) ground_truth.append(y.detach().cpu().numpy()) experiment.log_metric("Mini batch loss", loss.item(), step=TEST_MINI_BATCH) TEST_MINI_BATCH += 1 average_loss = total_loss / batches predictions = np.concatenate(predictions) ground_truth = np.concatenate(ground_truth) accuracy = accuracy_score(ground_truth, predictions) f1score = f1_score(ground_truth, predictions) precision = precision_score(ground_truth, predictions) recall = recall_score(ground_truth, predictions) return average_loss, accuracy, f1score, precision, recall
def train_and_evaluate(self, train_gen, val_gen, epochs): """ """ experiment = Experiment( api_key="VNQSdbR1pw33EkuHbUsGUSZWr", project_name="piratesofthecaribbean", workspace="florpi", ) model = self.build() with experiment.train(): model_path = os.path.join(self.directory, "cnn_{epoch:02d}-{val_loss:.2f}.hdf5") callbacks = [ ModelCheckpoint(model_path, monitor="val_loss", mode="min"), # EarlyStopping( # monitor="val_loss", # mode="min", # min_delta=0.1, # patience=1, # restore_best_weights=True, # ), ] model.fit( train_gen, epochs=epochs, validation_data=val_gen, callbacks=callbacks, class_weight=CLASS_WEIGHTS, ) model.save(os.path.join(self.directory, "cnn_final.h5")) # Run validation with experiment.test(): probabilities = [] y_val_all = [] # reset generator val_gen.reset() for idx, (X_val, y_val) in tqdm(enumerate(val_gen), desc="valset", total=val_gen._num_examples): y_val_all += y_val.tolist() probs = model.predict(X_val) probabilities += probs.tolist() if idx > val_gen._num_examples: break y_true = np.argmax(y_val_all, axis=-1) y_pred = np.argmax(probabilities, axis=-1) visualize.plot_confusion_matrix(y_true, y_pred, classes=LABELS, normalize=True, experiment=experiment) visualize.plot_confusion_matrix(y_true, y_pred, classes=LABELS, normalize=False, experiment=experiment) experiment.log_confusion_matrix(y_true=y_true, y_predicted=y_pred, labels=LABELS) return model
def train(self, model, pair_generator, fold, output_file, use_nprf=False): '''Driver function for training Args: model: a keras Model pair_generator: a instantiated pair generator fold: which fold to run. partitions will be automatically rotated. output_file: temporary file for validation use_nprf: whether to use nprf Returns: ''' # set tensorflow not to use the full GPU memory # session = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) initial_lrate = self.config.learning_rate experiment = Experiment(api_key="PhzBYNpSC304fMjGUoU42dX9b", project_name="nprf-drmm", workspace="neural-ir", auto_param_logging=False) experiment_params = { "batch_size": self.config.batch_size, "optimizer": self.config.optimizer, "epochs": self.config.max_iteration, "initial_learning_rate": initial_lrate } experiment.log_multiple_params(experiment_params) # qid list config qid_list = deque(self.config.qid_list) rotate = fold - 1 map(qid_list.rotate(rotate), qid_list) train_qid_list, valid_qid_list, test_qid_list = qid_list[0] + qid_list[ 1] + qid_list[2], qid_list[3], qid_list[4] print(train_qid_list, valid_qid_list, test_qid_list) relevance_dict = load_pickle(self.config.relevance_dict_path) # pair_generator = DDMPairGenerator(**self.config.generator_params) nb_pair_train = pair_generator.count_pairs_balanced( train_qid_list, self.config.pair_sample_size) valid_params = self.eval_by_qid_list_helper(valid_qid_list, pair_generator) test_params = self.eval_by_qid_list_helper(test_qid_list, pair_generator) print(valid_params[-1], test_params[-1]) batch_logger = NBatchLogger(50) batch_losses = [] met = [[], [], [], [], [], []] iteration = -1 best_valid_map = 0.0 new_lrate = initial_lrate for i in range(self.config.nb_epoch): print("Epoch " + str(i)) nb_batch = nb_pair_train / self.config.batch_size train_generator = pair_generator.generate_pair_batch( train_qid_list, self.config.pair_sample_size) for j in range(nb_batch / 100): iteration += 1 new_lrate = self._step_decay(iteration, initial_lrate) K.set_value(model.optimizer.lr, new_lrate) history = model.fit_generator( generator=train_generator, steps_per_epoch= 100, # nb_pair_train / self.config.batch_size, epochs=1, shuffle=False, verbose=0, callbacks=[batch_logger], ) batch_losses.append(batch_logger.losses) print("[Iter {0}]\tLoss: {1}\tlr: {2}".format( iteration, history.history['loss'][0], new_lrate)) experiment.log_parameter("curr_epoch", iteration + 1, step=(iteration + 1)) experiment.log_parameter("curr_lr", new_lrate, step=(iteration + 1)) experiment.log_metric("curr_loss", history.history['loss'][0], step=(iteration + 1)) kwargs = { 'model': model, 'relevance_dict': relevance_dict, 'rerank_topk': self.config.rerank_topk, 'qrels_file': self.config.qrels_file, 'docnolist_file': self.config.docnolist_file, 'runid': self.config.runid, 'output_file': output_file } if use_nprf: kwargs.update({ 'nb_supervised_doc': self.config.nb_supervised_doc, 'doc_topk_term': self.config.doc_topk_term, }) valid_met = self.eval_by_qid_list(*valid_params, **kwargs) print("[Valid]\t\tMAP\tP20\tNDCG20") print("\t\t{0}\t{1}\t{2}".format(valid_met[0], valid_met[1], valid_met[2])) met[0].append(valid_met[0]) met[1].append(valid_met[1]) met[2].append(valid_met[2]) with experiment.validate(): experiment.log_metric("map", valid_met[0], step=(iteration + 1)) experiment.log_metric("p@20", valid_met[1], step=(iteration + 1)) experiment.log_metric("ndcg@20", valid_met[2], step=(iteration + 1)) if valid_met[0] > best_valid_map: model.save_weights( os.path.join(self.config.save_path, "fold{0}.h5".format(fold))) best_valid_map = valid_met[0] kwargs['output_file'] = os.path.join( self.config.result_path, "fold{0}.iter{1}.res".format(fold, iteration)) # test_met = eval_partial(qid_list=test_qid_list) test_met = self.eval_by_qid_list(*test_params, **kwargs) print("[Test]\t\tMAP\tP20\tNDCG20") print("\t\t{0}\t{1}\t{2}".format(test_met[0], test_met[1], test_met[2])) met[3].append(test_met[0]) met[4].append(test_met[1]) met[5].append(test_met[2]) with experiment.test(): experiment.log_metric("map", test_met[0], step=(iteration + 1)) experiment.log_metric("p@20", test_met[1], step=(iteration + 1)) experiment.log_metric("ndcg@20", test_met[2], step=(iteration + 1)) print("[Attention]\t\tCurrent best iteration {0}\n".format( met[0].index(max(met[0])))) if iteration > self.config.max_iteration: break # model.save_weights(os.path.join(self.config.save_path, "fold{0}.epoch{1}.h5".format(fold, i))) best_iter, eval_met = self._extract_max_metric(met) retain_file(self.config.result_path, "fold{0}".format(fold), "fold{0}.iter{1}.res".format(fold, best_iter)) # np.save('loss.npy', batch_losses) # np.save('met.npy', met) return eval_met
def train(args, use_comet: bool = True): data_cls = funcs[args['dataset']] model_cls = funcs[args['model']] network = funcs[args['network']] print('[INFO] Getting dataset...') data = data_cls() (x_train, y_train), (x_test, y_test) = data.load_data() classes = data.mapping # #Used for testing only # x_train = x_train[:100, :, :] # y_train = y_train[:100, :] # x_test = x_test[:100, :, :] # y_test = y_test[:100, :] # print ('[INFO] Training shape: ', x_train.shape, y_train.shape) # print ('[INFO] Test shape: ', x_test.shape, y_test.shape) # #delete these lines y_test_labels = [ np.where(y_test[idx] == 1)[0][0] for idx in range(len(y_test)) ] # distribute 90% test 10% val dataset with equal class distribution (x_test, x_valid, y_test, y_valid) = train_test_split(x_test, y_test, test_size=0.1, stratify=y_test_labels, random_state=42) print('[INFO] Training shape: ', x_train.shape, y_train.shape) print('[INFO] Validation shape: ', x_valid.shape, y_valid.shape) print('[INFO] Test shape: ', x_test.shape, y_test.shape) print('[INFO] Setting up the model..') model = model_cls(network, data_cls) print(model) dataset = dict({ 'x_train': x_train, 'y_train': y_train, 'x_valid': x_valid, 'y_valid': y_valid, 'x_test': x_test, 'y_test': y_test }) if use_comet and args['find_lr'] == False: #create an experiment with your api key experiment = Experiment(api_key='INSERT API KEY', project_name='emnist', auto_param_logging=False) print('[INFO] Starting Training...') #will log metrics with the prefix 'train_' with experiment.train(): _ = train_model(model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], name=args['network']) print('[INFO] Starting Testing...') #will log metrics with the prefix 'test_' with experiment.test(): loss, score = model.evaluate(dataset, args['batch_size']) print(f'[INFO] Test evaluation: {score*100}') metrics = {'loss': loss, 'accuracy': score} experiment.log_metrics(metrics) experiment.log_parameters(args) experiment.log_dataset_hash( x_train) #creates and logs a hash of your data experiment.end() elif use_comet and args['find_lr'] == True: _ = train_model(model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], FIND_LR=args['find_lr'], name=args['network']) else: print('[INFO] Starting Training...') train_model(model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], name=args['network']) print('[INFO] Starting Testing...') loss, score = model.evaluate(dataset, args['batch_size']) print(f'[INFO] Test evaluation: {score*100}') if args['weights']: model.save_weights() if args['save_model']: model.save_model()
def fit_test(exp_params, data_path, k, write_path, others=None, custom_tag=''): """Fit model and compute metrics on both train and test sets. Also log plot and embeddings to comet. Args: exp_params(dict): Parameter dict. Should at least have keys model_name, dataset_name & random_state. Other keys are assumed to be model parameters. k(int): Fold identifier. data_path(str): Data directory. write_path(str): Where temp files can be written. others(dict): Other things to log to Comet experiment. custom_tag(str): Custom tag for Comet experiment. """ # Increment fold to avoid reusing validation seeds k += 10 # Comet experiment exp = Experiment(parse_args=False) exp.disable_mp() custom_tag += '_test' exp.add_tag(custom_tag) exp.log_parameters(exp_params) if others is not None: exp.log_others(others) # Parse experiment parameters model_name, dataset_name, random_state, model_params = parse_params(exp_params) # Fetch and split dataset. data_train_full = getattr(grae.data, dataset_name)(split='train', random_state=random_state, data_path=data_path) data_test = getattr(grae.data, dataset_name)(split='test', random_state=random_state, data_path=data_path) if model_name == 'PCA': # No validation split on PCA data_train, data_val = data_train_full, None else: data_train, data_val = data_train_full.validation_split(random_state=FOLD_SEEDS[k]) # Model m = getattr(grae.models, model_name)(random_state=FOLD_SEEDS[k], **model_params) m.comet_exp = exp # Used by DL models to log metrics between epochs m.write_path = write_path m.data_val = data_val # For early stopping # Benchmark fit time fit_start = time.time() m.fit(data_train) fit_stop = time.time() fit_time = fit_stop - fit_start # Log plots m.plot(data_train, data_test, title=f'{model_name}_{dataset_name}') if dataset_name in ['Faces', 'RotatedDigits', 'UMIST', 'Tracking', 'COIL100', 'Teapot']: m.view_img_rec(data_train, choice='random', title=f'{model_name}_{dataset_name}_train_rec') m.view_img_rec(data_test, choice='best', title=f'{model_name}_{dataset_name}_test_rec_best') m.view_img_rec(data_test, choice='worst', title=f'{model_name}_{dataset_name}_test_rec_worst') elif dataset_name in ['ToroidalHelices', 'Mammoth'] or 'SwissRoll' in dataset_name: m.view_surface_rec(data_train, title=f'{model_name}_{dataset_name}_train_rec', dataset_name=dataset_name) m.view_surface_rec(data_test, title=f'{model_name}_{dataset_name}_test_rec', dataset_name=dataset_name) # Score models prober = EmbeddingProber() prober.fit(model=m, dataset=data_train_full) with exp.train(): train_z, train_metrics = prober.score(data_train_full) _, train_y = data_train_full.numpy() # Log train metrics exp.log_metric(name='fit_time', value=fit_time) exp.log_metrics(train_metrics) with exp.test(): test_z, test_metrics = prober.score(data_test) _, test_y = data_test.numpy() # Log train metrics exp.log_metrics(test_metrics) # Log embedding as .npy file file_name = os.path.join(write_path, f'emb_{model_name}_{dataset_name}.npy') save_dict(dict(train_z=train_z, train_y=train_y, test_z=test_z, test_y=test_y, random_state=random_state, dataset_name=dataset_name, model_name=model_name), file_name) file = open(file_name, 'rb') exp.log_asset(file, file_name=file_name) file.close() os.remove(file_name) # Log marker to mark successful experiment exp.log_other('success', 1)
def train(args, use_comet : bool = True): data_cls = funcs[args['dataset']] model_cls = funcs[args['model']] network = funcs[args['network']] print ('[INFO] Getting dataset...') data = data_cls() data.load_data() (x_train, y_train), (x_test, y_test) = (data.x_train, data.y_train), (data.x_test, data.y_test) classes = data.mapping # #Used for testing only # x_train = x_train[:100, :, :] # y_train = y_train[:100, :] # x_test = x_test[:100, :, :] # y_test = y_test[:100, :] # print ('[INFO] Training shape: ', x_train.shape, y_train.shape) # print ('[INFO] Test shape: ', x_test.shape, y_test.shape) # #delete these lines # distribute 90% test 10% val dataset with equal class distribution (x_test, x_valid, y_test, y_valid) = train_test_split(x_test, y_test, test_size=0.2, random_state=42) print ('[INFO] Training shape: ', x_train.shape, y_train.shape) print ('[INFO] Validation shape: ', x_valid.shape, y_valid.shape) print ('[INFO] Test shape: ', x_test.shape, y_test.shape) print ('[INFO] Setting up the model..') if args['network'] == 'lstmctc': network_args = {'backbone' : args['backbone'], 'seq_model' : args['seq'], 'bi' : args['bi'] } model = model_cls(network, data_cls, network_args) else: model = model_cls(network, data_cls) print (model) dataset = dict({ 'x_train' : x_train, 'y_train' : y_train, 'x_valid' : x_valid, 'y_valid' : y_valid, 'x_test' : x_test, 'y_test' : y_test }) if use_comet and args['find_lr'] == False: #create an experiment with your api key experiment = Experiment(api_key='WVBNRAfMLCBWslJAAsffxM4Gz', project_name='iam_lines', auto_param_logging=False) print ('[INFO] Starting Training...') #will log metrics with the prefix 'train_' with experiment.train(): _ = train_model( model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], name=args['network'] ) print ('[INFO] Starting Testing...') #will log metrics with the prefix 'test_' with experiment.test(): score = model.evaluate(dataset, int(args['batch_size'])) print(f'[INFO] Test evaluation: {score*100}...') metrics = { 'accuracy':score } experiment.log_metrics(metrics) experiment.log_parameters(args) experiment.log_dataset_hash(x_train) #creates and logs a hash of your data experiment.end() elif use_comet and args['find_lr'] == True: _ = train_model( model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], FIND_LR=args['find_lr'], name=args['network'] ) else : print ('[INFO] Starting Training...') train_model( model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], name=args['network'] ) print ('[INFO] Starting Testing...') score = model.evaluate(dataset, args['batch_size']) print(f'[INFO] Test evaluation: {score*100}...') if args['weights']: model.save_weights() if args['save_model']: model.save_model()
def main(args): if args.dataset in ('FB15k-237', 'kinship', 'nations', 'umls', 'WN18RR', 'YAGO3-10'): S = joblib.load(args.data_path) train_set = FBDataset(S['train_data'], args.prefetch_to_gpu) valid_set = FBDataset(S['val_data'], attr_data) test_set = FBDataset(S['test_data'], attr_data) else: train_set = FBDataset(args.data_path % 'train', args.prefetch_to_gpu) valid_set = FBDataset(args.data_path % 'valid') test_set = FBDataset(args.data_path % 'test') print('50 Most Commone Attributes') if args.prefetch_to_gpu: train_hash = set([r.tobytes() for r in train_set.dataset.cpu().numpy()]) else: train_hash = set([r.tobytes() for r in train_set.dataset]) all_hash = train_hash.copy() all_hash.update(set([r.tobytes() for r in valid_set.dataset])) all_hash.update(set([r.tobytes() for r in test_set.dataset])) logdir = args.outname_base + '_logs' + '/' if args.remove_old_run: shutil.rmtree(logdir) if not os.path.exists(logdir): os.makedirs(logdir) tflogger = tfLogger(logdir) ''' Comet Logging ''' experiment = Experiment(api_key="Ht9lkWvTm58fRo9ccgpabq5zV", disabled= not args.do_log ,project_name="graph-invariance-icml", workspace="joeybose") experiment.set_name(args.namestr) modelD = TransD(args.num_ent, args.num_rel, args.embed_dim, args.p) fairD_0, fairD_1, fairD_2 = None,None,None optimizer_fairD_0, optimizer_fairD_1, optimizer_fairD_2 = None,None,None filter_0, filter_1, filter_2 = None, None, None if args.debug: ipdb.set_trace() if args.load_transD: modelD.load(args.saved_path) if args.use_cuda: modelD.cuda() if args.use_attr: ''' Hard Coded to the most common attribute for now ''' attr_data = [args.attr_mat,args.ent_to_idx,args.attr_to_idx,\ args.reindex_attr_idx,args.attr_count] fairD_0 = FBDemParDisc(args.embed_dim,args.fair_att_0,'0',attr_data,args.use_cross_entropy) fairD_1 = FBDemParDisc(args.embed_dim,args.fair_att_1,'1',attr_data,args.use_cross_entropy) fairD_2 = FBDemParDisc(args.embed_dim,args.fair_att_2,'2',attr_data,args.use_cross_entropy) most_common_attr = [print(fairD_0.inv_attr_map[int(k)]) for k in \ fairD_0.reindex_to_idx.keys()] ''' Initialize Optimizers ''' if args.sample_mask: filter_0 = AttributeFilter(args.embed_dim,attribute='0') filter_1 = AttributeFilter(args.embed_dim,attribute='1') filter_2 = AttributeFilter(args.embed_dim,attribute='2') filter_0.cuda() filter_1.cuda() filter_2.cuda() optimizer_fairD_0 = optimizer(fairD_0.parameters(),'adam', args.lr) optimizer_fairD_1 = optimizer(fairD_1.parameters(),'adam',args.lr) optimizer_fairD_2 = optimizer(fairD_2.parameters(),'adam', args.lr) elif args.use_trained_filters and not args.sample_mask: filter_0 = AttributeFilter(args.embed_dim,attribute='0') filter_1 = AttributeFilter(args.embed_dim,attribute='1') filter_2 = AttributeFilter(args.embed_dim,attribute='2') filter_0.cuda() filter_1.cuda() filter_2.cuda() else: optimizer_fairD_0 = optimizer(fairD_0.parameters(),'adam', args.lr) optimizer_fairD_1 = optimizer(fairD_1.parameters(),'adam',args.lr) optimizer_fairD_2 = optimizer(fairD_2.parameters(),'adam', args.lr) filter_0, filter_1, filter_2 = None, None, None if args.use_cuda: fairD_0.cuda() fairD_1.cuda() fairD_2.cuda() elif args.use_1_attr: attr_data = [args.attr_mat,args.ent_to_idx,args.attr_to_idx,\ args.reindex_attr_idx,args.attr_count] fairD_1 = FBDemParDisc(args.embed_dim,args.fair_att_1,'1',attr_data,\ use_cross_entropy=args.use_cross_entropy) fairD_1.cuda() optimizer_fairD_1 = optimizer(fairD_1.parameters(),'adam',args.lr) elif args.use_0_attr: attr_data = [args.attr_mat,args.ent_to_idx,args.attr_to_idx,\ args.reindex_attr_idx,args.attr_count] fairD_0 = FBDemParDisc(args.embed_dim,args.fair_att_0,'0',attr_data,\ use_cross_entropy=args.use_cross_entropy) optimizer_fairD_0 = optimizer(fairD_0.parameters(),'adam', args.lr) fairD_0.cuda() elif args.use_2_attr: attr_data = [args.attr_mat,args.ent_to_idx,args.attr_to_idx,\ args.reindex_attr_idx,args.attr_count] fairD_2 = FBDemParDisc(args.embed_dim,args.fair_att_2,'2',attr_data,\ use_cross_entropy=args.use_cross_entropy) optimizer_fairD_2 = optimizer(fairD_2.parameters(),'adam', args.lr) fairD_2.cuda() if args.load_filters: filter_0.load(args.filter_0_saved_path) filter_1.load(args.filter_1_saved_path) filter_2.load(args.filter_2_saved_path) ''' Create Sets ''' fairD_set = [fairD_0,fairD_1,fairD_2] filter_set = [filter_0,filter_1,filter_2] optimizer_fairD_set = [optimizer_fairD_0, optimizer_fairD_1,\ optimizer_fairD_2] D_monitor = OrderedDict() test_val_monitor = OrderedDict() if args.sample_mask and not args.use_trained_filters: optimizerD = optimizer(list(modelD.parameters()) + \ list(filter_0.parameters()) + \ list(filter_1.parameters()) + \ list(filter_2.parameters()), 'adam', args.lr) else: optimizerD = optimizer(modelD.parameters(), 'adam_hyp3', args.lr) # optimizerD = optimizer(modelD.parameters(), 'adam', args.lr) schedulerD = lr_scheduler(optimizerD, args.decay_lr, args.num_epochs) loss_func = MarginRankingLoss(args.margin,1) _cst_inds = torch.LongTensor(np.arange(args.num_ent, \ dtype=np.int64)[:,None]).cuda().repeat(1, args.batch_size//2) _cst_s = torch.LongTensor(np.arange(args.batch_size//2)).cuda() _cst_s_nb = torch.LongTensor(np.arange(args.batch_size//2,args.batch_size)).cuda() _cst_nb = torch.LongTensor(np.arange(args.batch_size)).cuda() if args.prefetch_to_gpu: train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=0, collate_fn=collate_fn) else: train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=4, pin_memory=True, collate_fn=collate_fn) if args.freeze_transD: freeze_model(modelD) ''' Joint Training ''' if not args.dont_train: with experiment.train(): for epoch in tqdm(range(1, args.num_epochs + 1)): train(train_loader,epoch,args,train_hash,modelD,optimizerD,\ tflogger,fairD_set,optimizer_fairD_set,filter_set,experiment) gc.collect() if args.decay_lr: if args.decay_lr == 'ReduceLROnPlateau': schedulerD.step(monitor['D_loss_epoch_avg']) else: schedulerD.step() if epoch % args.valid_freq == 0: with torch.no_grad(): l_ranks, r_ranks = test(test_set,args,all_hash,\ modelD,tflogger,filter_set,experiment,subsample=20) l_mean = l_ranks.mean() r_mean = r_ranks.mean() l_mrr = (1. / l_ranks).mean() r_mrr = (1. / r_ranks).mean() l_h10 = (l_ranks <= 10).mean() r_h10 = (r_ranks <= 10).mean() l_h5 = (l_ranks <= 5).mean() r_h5 = (r_ranks <= 5).mean() avg_mr = (l_mean + r_mean)/2 avg_mrr = (l_mrr+r_mrr)/2 avg_h10 = (l_h10+r_h10)/2 avg_h5 = (l_h5+r_h5)/2 if args.use_attr: test_fairness(test_set,args, modelD,tflogger,\ fairD_0,attribute='0',\ epoch=epoch,experiment=experiment,filter_=filter_0) test_fairness(test_set,args,modelD,tflogger,\ fairD_1,attribute='1',epoch=epoch,\ experiment=experiment,filter_=filter_1) test_fairness(test_set,args, modelD,tflogger,\ fairD_2,attribute='2',epoch=epoch,\ experiment=experiment,filter_=filter_2) elif args.use_0_attr: test_fairness(test_set,args,modelD,tflogger,\ fairD_0,attribute='0',epoch=epoch,\ experiment=experiment,filter_=filter_0) elif args.use_1_attr: test_fairness(test_set,args,modelD,tflogger,\ fairD_1,attribute='1',epoch=epoch,\ experiment=experiment,filter_=filter_1) elif args.use_2_attr: test_fairness(test_set,args,modelD,tflogger,\ fairD_2,attribute='2',epoch=epoch,\ experiment=experiment,filter_=filter_2) joblib.dump({'l_ranks':l_ranks,'r_ranks':r_ranks},args.outname_base+\ 'epoch{}_validation_ranks.pkl'.format(epoch), compress=9) print("Mean Rank is %f" %(float(avg_mr))) if args.do_log: # Tensorboard logging tflogger.scalar_summary('Mean Rank',float(avg_mr),epoch) tflogger.scalar_summary('Mean Reciprocal Rank',float(avg_mrr),epoch) tflogger.scalar_summary('Hit @10',float(avg_h10),epoch) tflogger.scalar_summary('Hit @5',float(avg_h5),epoch) experiment.log_metric("Mean Rank",float(avg_mr),step=counter) modelD.save(args.outname_base+'D_epoch{}.pts'.format(epoch)) if epoch % (args.valid_freq * 5) == 0: l_ranks, r_ranks = test(test_set,args,all_hash,modelD,\ tflogger,filter_set,experiment,subsample=20) l_mean = l_ranks.mean() r_mean = r_ranks.mean() l_mrr = (1. / l_ranks).mean() r_mrr = (1. / r_ranks).mean() l_h10 = (l_ranks <= 10).mean() r_h10 = (r_ranks <= 10).mean() l_h5 = (l_ranks <= 5).mean() r_h5 = (r_ranks <= 5).mean() if args.sample_mask: filter_0.save(args.outname_base+'Filter_0.pts') filter_1.save(args.outname_base+'Filter_1.pts') filter_2.save(args.outname_base+'Filter_2.pts') if args.test_new_disc: ''' Testing with fresh discriminators ''' args.use_attr = True args.use_trained_filters = True with experiment.test(): args.force_ce = True if args.use_trained_filters: logdir_filter = args.outname_base + '_test_2_filter_logs' + '/' if args.remove_old_run: shutil.rmtree(logdir_filter) if not os.path.exists(logdir_filter): os.makedirs(logdir_filter) tflogger_filter = tfLogger(logdir_filter) args.use_trained_filters = True ''' Test With Filters ''' if args.use_attr: retrain_disc(args,experiment,train_loader,train_hash,test_set,modelD,\ optimizerD,tflogger_filter,filter_2=filter_2,filter_0=None,\ filter_1=None,attribute='2') retrain_disc(args,experiment,train_loader,train_hash,test_set,modelD,\ optimizerD,tflogger_filter,filter_0,filter_1=None,\ filter_2=None,attribute='0') retrain_disc(args,experiment,train_loader,train_hash,test_set,modelD,\ optimizerD,tflogger_filter,filter_1=filter_1,\ filter_0=None,filter_2=None,attribute='1') elif args.use_0_attr: retrain_disc(args,experiment,train_loader,train_hash,test_set,modelD,\ optimizerD,tflogger_filter,filter_0,filter_1=None,\ filter_2=None,attribute='0') elif args.use_1_attr: retrain_disc(args,experiment,train_loader,train_hash,test_set,modelD,\ optimizerD,experiment,tflogger_filter,filter_1=filter_1,\ filter_0=None,filter_2=None,attribute='1') elif args.use_2_attr: retrain_disc(args,experiment,train_loader,train_hash,test_set,modelD,\ optimizerD,tflogger_filter,filter_2=filter_2,filter_0=None,\ filter_1=None,attribute='2') args.freeze_transD = True args.use_trained_filters = False logdir_no_filter = args.outname_base + '_test_no_2_filter_logs' + '/' if args.remove_old_run: shutil.rmtree(logdir_no_filter) if not os.path.exists(logdir_no_filter): os.makedirs(logdir_no_filter) tflogger_no_filter = tfLogger(logdir_no_filter)
class ModelTrainer: def __init__(self, model, dataloader, args): self.model = model self.args = args self.data = dataloader self.metric = args.metric if (dataloader is not None): self.frq_log = len(dataloader['train']) // args.frq_log self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') model.to(self.device) if args.optimizer == 'sgd': self.optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'adam': self.optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(args.beta1, 0.999), weight_decay=args.weight_decay) else: raise Exception('--optimizer should be one of {sgd, adam}') if args.scheduler == 'set': self.scheduler = optim.lr_scheduler.LambdaLR( self.optimizer, lambda epoch: 10**(epoch / args.scheduler_factor)) elif args.scheduler == 'auto': self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode='min', factor=args.scheduler_factor, patience=5, verbose=True, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08) self.experiment = Experiment(api_key=args.comet_key, project_name=args.comet_project, workspace=args.comet_workspace, auto_weight_logging=True, auto_metric_logging=False, auto_param_logging=False) self.experiment.set_name(args.name) self.experiment.log_parameters(vars(args)) self.experiment.set_model_graph(str(self.model)) def train_one_epoch(self, epoch): self.model.train() train_loader = self.data['train'] train_loss = 0 correct = 0 comet_offset = epoch * len(train_loader) for batch_idx, (data, target) in tqdm(enumerate(train_loader), leave=True, total=len(train_loader)): data, target = data.to(self.device), target.to(self.device) self.optimizer.zero_grad() output = self.model(data) loss = F.cross_entropy(output, target, reduction='sum') loss.backward() self.optimizer.step() pred = output.argmax(dim=1, keepdim=True) acc = pred.eq(target.view_as(pred)).sum().item() train_loss += loss.item() correct += acc loss = loss.item() / len(data) acc = 100. * acc / len(data) comet_step = comet_offset + batch_idx self.experiment.log_metric('batch_loss', loss, comet_step, epoch) self.experiment.log_metric('batch_acc', acc, comet_step, epoch) if (batch_idx + 1) % self.frq_log == 0: self.experiment.log_metric('log_loss', loss, comet_step, epoch) self.experiment.log_metric('log_acc', acc, comet_step, epoch) print('Epoch: {} [{}/{}]\tLoss: {:.6f}\tAcc: {:.2f}%'.format( epoch + 1, (batch_idx + 1) * len(data), len(train_loader.dataset), loss, acc)) train_loss /= len(train_loader.dataset) acc = 100. * correct / len(train_loader.dataset) comet_step = comet_offset + len(train_loader) - 1 self.experiment.log_metric('loss', train_loss, comet_step, epoch) self.experiment.log_metric('acc', acc, comet_step, epoch) print( 'Epoch: {} [Done]\tLoss: {:.4f}\tAccuracy: {}/{} ({:.2f}%)'.format( epoch + 1, train_loss, correct, len(train_loader.dataset), acc)) return {'loss': train_loss, 'acc': acc} def train(self): self.log_cmd() best = -1 history = {'lr': [], 'train_loss': []} try: print(">> Training %s" % self.model.name) for epoch in range(self.args.nepoch): with self.experiment.train(): train_res = self.train_one_epoch(epoch) with self.experiment.validate(): print("\nvalidation...") comet_offset = (epoch + 1) * len(self.data['train']) - 1 res = self.val(self.data['val'], comet_offset, epoch) if res[self.metric] > best: best = res[self.metric] self.save_weights(epoch) if self.args.scheduler == 'set': lr = self.optimizer.param_groups[0]['lr'] history['lr'].append(lr) history['train_loss'].append(train_res['loss']) self.scheduler.step(epoch + 1) lr = self.optimizer.param_groups[0]['lr'] print('learning rate changed to: %.10f' % lr) elif self.args.scheduler == 'auto': self.scheduler.step(train_res['loss']) finally: print(">> Training model %s. [Stopped]" % self.model.name) self.experiment.log_asset_folder(os.path.join( self.args.outf, self.args.name, 'weights'), step=None, log_file_name=False, recursive=False) if self.args.scheduler == 'set': plt.semilogx(history['lr'], history['train_loss']) plt.grid(True) self.experiment.log_figure(figure=plt) plt.show() def val(self, val_loader, comet_offset=-1, epoch=-1): self.model.eval() test_loss = 0 correct = 0 labels = list(range(self.args.nclass)) cm = np.zeros((len(labels), len(labels))) with torch.no_grad(): for data, target in tqdm(val_loader, leave=True, total=len(val_loader)): data, target = data.to(self.device), target.to(self.device) output = self.model(data) test_loss += F.cross_entropy(output, target, reduction='sum').item() pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() pred = pred.view_as(target).data.cpu().numpy() target = target.data.cpu().numpy() cm += confusion_matrix(target, pred, labels=labels) test_loss /= len(val_loader.dataset) accuracy = 100. * correct / len(val_loader.dataset) print('Evaluation: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'. format(test_loss, correct, len(val_loader.dataset), accuracy)) res = {'loss': test_loss, 'acc': accuracy} self.experiment.log_metrics(res, step=comet_offset, epoch=epoch) self.experiment.log_confusion_matrix( matrix=cm, labels=[ClassDict.getName(x) for x in labels], title='confusion matrix after epoch %03d' % epoch, file_name="confusion_matrix_%03d.json" % epoch) return res def test(self): self.load_weights() with self.experiment.test(): print('\ntesting....') res = self.val(self.data['test']) def log_cmd(self): d = vars(self.args) cmd = '!python main.py \\\n' tab = ' ' for k, v in d.items(): if v is None or v == '' or (isinstance(v, bool) and v is False): continue if isinstance(v, bool): arg = '--{} \\\n'.format(k) else: arg = '--{} {} \\\n'.format(k, v) cmd = cmd + tab + arg # print(cmd); self.experiment.log_text(cmd) def save_weights(self, epoch: int): weight_dir = os.path.join(self.args.outf, self.args.name, 'weights') if not os.path.exists(weight_dir): os.makedirs(weight_dir) torch.save({ 'epoch': epoch, 'state_dict': self.model.state_dict() }, os.path.join(weight_dir, 'model.pth')) def load_weights(self): path_g = self.args.weights_path if path_g is None: weight_dir = os.path.join(self.args.outf, self.args.name, 'weights') path_g = os.path.join(weight_dir, 'model.pth') print('>> Loading weights...') weights_g = torch.load(path_g, map_location=self.device)['state_dict'] self.model.load_state_dict(weights_g) print(' Done.') def predict(self, x): x = x / 2**15 self.model.eval() with torch.no_grad(): x = torch.from_numpy(x).float() x = self.transform(x) x = x.unsqueeze(0) x = self.model(x) x = F.softmax(x, dim=1) x = x.numpy() return x
model.fit(x=X_train, y=y_train, epochs=1500, batch_size=1000, validation_split=0.1, callbacks=[ keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=10, patience=100) ], verbose=10, workers=2) with comet_experiment.test(): loss, accuracy = model.evaluate(X_test, y_test, verbose=10, batch_size=1000000, workers=2) y_pred = model.predict_classes(X_test) comet_experiment.log_metrics( testing_utils.testing_metrics(y_test=y_test, y_pred=y_pred)) model.save(f'saved_models/{search_space_model["Model"]}.h5') comet_experiment.end() search_space_state = pd.read_excel(search_space_path)
def main(args): ''' Preamble ''' save_path_base = "./reddit_data/Reddit_split_2017-11/split_csv/" save_path_k_core = save_path_base + str(args.k_core) + \ '_' + args.save_master_k_core G = nx.read_gpickle(save_path_k_core) top_nodes_G = sorted(G.degree, key=lambda x: x[1], \ reverse=True)[args.skip_n:101+args.skip_n] top_nodes_G = [n for n in top_nodes_G if n[0].split('_')[0] != 'U'] sensitive_nodes = random.sample(top_nodes_G, args.num_sensitive) u_to_idx, sr_to_idx = reddit_mappings(list(G.nodes())) args.num_users = len(u_to_idx) args.num_sr = len(sr_to_idx) cutoff_constant = 0.9 reddit_check_edges(list(G.edges())) train_cutoff_row = int(np.round(len(G.edges()) * cutoff_constant)) users_cutoff_row = int(np.round(len(u_to_idx) * cutoff_constant)) args.cutoff_row = train_cutoff_row args.users_cutoff_row = users_cutoff_row all_users = list(u_to_idx) random.shuffle(all_users) ''' Train/Test Splits ''' args.users_train = [ u_to_idx[user] for user in all_users[:args.users_cutoff_row] ] args.users_test = [ u_to_idx[user] for user in all_users[args.users_cutoff_row:] ] train_set = RedditDataset( list(G.edges())[:args.cutoff_row], u_to_idx, sr_to_idx) test_set = RedditDataset( list(G.edges())[args.cutoff_row:], u_to_idx, sr_to_idx) train_fairness_set = NodeClassification(args.users_train, args.prefetch_to_gpu) test_fairness_set = NodeClassification(args.users_test, args.prefetch_to_gpu) if args.filter_false_negs: train_hash = set([(train_set.get_mapping(r)).numpy().tobytes() for r in train_set.dataset]) all_hash = train_hash.copy() all_hash.update(set([(test_set.get_mapping(r)).numpy().tobytes()\ for r in test_set.dataset])) else: train_hash = None all_hash = None all_masks = list(map(list, itertools.product([0, 1],\ repeat=args.num_sensitive))) if args.held_out_comp: args.mask_cutoff_row = int(np.round(len(all_masks) * cutoff_constant)) train_masks = all_masks[:args.mask_cutoff_row] test_masks = all_masks[args.mask_cutoff_row:] else: train_masks = all_masks print("Training Set size %d" % (len(train_set))) print("Test Set size %d" % (len(test_set))) ''' Define Models ''' if args.use_multi: modelD = to_multi_gpu(RedditEncoder(args.num_users,args.num_sr,args.embed_dim,\ args.p)) else: modelD = RedditEncoder(args.num_users,args.num_sr,args.embed_dim,\ args.p).to(args.device) ''' Define Discriminators ''' if args.use_attr: fairD_set, optimizer_fairD_set, filter_set = [], [], [] for sens_node in sensitive_nodes: D = RedditDiscriminator(G,args.embed_dim,\ sens_node[0],u_to_idx).to(args.device) optimizer_fairD = optimizer(D.parameters(), 'adam', args.lr) fairD_set.append(D) optimizer_fairD_set.append(optimizer_fairD) if not args.sample_mask: filter_set = None else: sr_params = [] for sens_node in sensitive_nodes: sr_filter = AttributeFilter(args.embed_dim,\ attribute=sens_node[0]).to(args.device) sr_params.append(sr_filter) filter_set.append(sr_filter) else: fairD_set, optimizer_fairD_set, filter_set = [None], None, None if args.debug: ipdb.set_trace() if args.sample_mask and not args.use_trained_filters: models = [modelD] + sr_params optimizerD = optimizer(itertools.chain.from_iterable(m.parameters() for m in\ models), 'adam', args.lr) else: optimizerD = optimizer(modelD.parameters(), 'adam', args.lr) ''' Comet Logging ''' experiment = Experiment(api_key=args.api_key, disabled=not args.do_log, project_name=args.project_name, workspace=args.workspace) experiment.set_name(args.namestr) ''' Train Loop ''' train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=8, pin_memory=True, collate_fn=collate_fn) # train_compositional_reddit_classifier(args,modelD,G,sensitive_nodes,\ # u_to_idx,train_fairness_set,test_fairness_set,experiment,all_masks,filter_set=filter_set) with experiment.train(): for epoch in tqdm(range(1, args.num_epochs + 1)): train_fair_reddit(train_loader,all_hash,epoch,args,modelD,optimizerD,\ fairD_set, optimizer_fairD_set, filter_set, train_masks, experiment) if epoch % args.valid_freq == 0: test_reddit_nce(test_set,epoch,all_hash,\ args,modelD,experiment,filter_set,subsample=1) if args.use_attr: for i, fairD in enumerate(fairD_set): if filter_set is not None: test_sensitive_sr(args,test_fairness_set,modelD,fairD,\ experiment,epoch,[filter_set[i]]) else: test_sensitive_sr(args,test_fairness_set,modelD,fairD,\ experiment,epoch,filter_set) constant = len(fairD_set) - fairD_set.count(None) if constant != 0 or args.test_new_disc: if args.test_new_disc: args.use_attr = True ''' Training Fresh Discriminators''' args.freeze_encoder = True freeze_model(modelD) with experiment.test(): ''' Train Classifier ''' if args.use_attr: if args.sample_mask and args.held_out_comp: ''' Compositional Held Out Test ''' train_compositional_reddit_classifier(args,modelD,G,sensitive_nodes,\ u_to_idx,train_fairness_set,test_fairness_set,experiment,test_masks,filter_set=filter_set) elif args.sample_mask and not args.held_out_comp: ''' Compositional All ''' train_compositional_reddit_classifier(args,modelD,G,sensitive_nodes,\ u_to_idx,train_fairness_set,test_fairness_set,experiment,all_masks,filter_set=filter_set) else: ''' Non Compositional ''' for sens_node in sensitive_nodes: train_reddit_classifier(args,modelD,G,sens_node[0],u_to_idx,\ train_fairness_set,test_fairness_set,\ experiment=experiment,filter_set=filter_set) experiment.end() torch.cuda.empty_cache()
def run(): BATCH_SIZE = args.batch_size LEARNING_RATE = args.learning_rate NUM_EPOCHS = args.epochs MODEL = args.model RANDOMIZE = args.randomize use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") logger = Logger('./logs/{}'.format(time.localtime())) print("Created model...") if MODEL: model = torch.load(MODEL).module else: model = cdssm.CDSSM() model = model.cuda() model = model.to(device) if torch.cuda.device_count() > 0: print("Let's use", torch.cuda.device_count(), "GPU(s)!") model = nn.DataParallel(model) print("Created dataset...") dataset = pytorch_data_loader.ValWikiDataset( test, claims_dict, testFile="shared_task_dev.jsonl", sparse_evidences=sparse_evidences, batch_size=BATCH_SIZE) dataloader = DataLoader(dataset, num_workers=0, collate_fn=pytorch_data_loader.PadCollate(), shuffle=False) OUTPUT_FREQ = int((len(dataset)) * 0.02) parameters = { "batch size": BATCH_SIZE, "data": args.data, "model": args.model } experiment = Experiment(api_key="YLsW4AvRTYGxzdDqlWRGCOhee", project_name="clsm", workspace="moinnadeem") experiment.add_tag("test") experiment.log_parameters(parameters) experiment.log_asset("cdssm.py") true = [] pred = [] model.eval() test_running_accuracy = 0.0 test_running_loss = 0.0 test_running_recall_at_ten = 0.0 recall_intervals = [1, 2, 5, 10, 20] recall = {} for i in recall_intervals: recall[i] = [] num_batches = 0 print("Evaluating...") beginning_time = time.time() criterion = torch.nn.NLLLoss() with experiment.test(): for batch_num, inputs in enumerate(dataloader): num_batches += 1 claims_tensors, claims_text, evidences_tensors, evidences_text, labels = inputs claims_tensors = claims_tensors.cuda() evidences_tensors = evidences_tensors.cuda() labels = labels.cuda() y_pred = model(claims_tensors, evidences_tensors) y = (labels).float() y_pred = y_pred.squeeze() loss = criterion(y_pred, torch.max(y, 1)[1]) test_running_loss += loss.item() y_pred = torch.exp(y_pred) binary_y = torch.max(y, 1)[1] binary_y_pred = torch.max(y_pred, 1)[1] accuracy = (binary_y == binary_y_pred).to(device) bin_acc = y_pred[:, 1] accuracy = accuracy.float().mean() # bin_acc = y_pred # handle ranking here! sorted_idxs = torch.sort(bin_acc, descending=True)[1] relevant_evidences = [] for idx in range(y.shape[0]): try: if int(y[idx][1]): relevant_evidences.append(evidences_text[idx]) except Exception as e: print(y, y[idx], idx) raise e # if len(relevant_evidences)==0: # print("Zero relevant", y.sum()) retrieved_evidences = [] for idx in sorted_idxs: retrieved_evidences.append(evidences_text[idx]) for k in recall_intervals: if len(relevant_evidences) == 0: # recall[k].append(0) pass else: recall[k].append( calculate_recall(retrieved_evidences, relevant_evidences, k=k)) if len(relevant_evidences) == 0: #test_running_recall_at_ten += 0.0 pass else: test_running_recall_at_ten += calculate_recall( retrieved_evidences, relevant_evidences, k=20) if args.print: for idx in sorted_idxs: print("Claim: {}, Evidence: {}, Prediction: {}, Label: {}". format(claims_text[0], evidences_text[idx], y_pred[idx], y[idx])) # compute recall # assuming only one claim, this creates a list of all relevant evidences true.extend(binary_y.tolist()) pred.extend(binary_y_pred.tolist()) test_running_accuracy += accuracy.item() if batch_num % OUTPUT_FREQ == 0 and batch_num > 0: elapsed_time = time.time() - beginning_time print("[{}:{:3f}s]: accuracy: {}, loss: {}, recall@20: {}". format(batch_num / len(dataloader), elapsed_time, test_running_accuracy / OUTPUT_FREQ, test_running_loss / OUTPUT_FREQ, test_running_recall_at_ten / OUTPUT_FREQ)) for k in sorted(recall.keys()): v = recall[k] print("recall@{}: {}".format(k, np.mean(v))) # 1. Log scalar values (scalar summary) info = {'test_accuracy': test_running_accuracy / OUTPUT_FREQ} true = [int(i) for i in true] pred = [int(i) for i in pred] print(classification_report(true, pred)) for tag, value in info.items(): experiment.log_metric(tag, value, step=batch_num) # 2. Log values and gradients of the parameters (histogram summary) # for tag, value in model.named_parameters(): # tag = tag.replace('.', '/') # logger.histo_summary(tag, value.data.cpu().numpy(), batch_num+1) test_running_accuracy = 0.0 test_running_recall_at_ten = 0.0 test_running_loss = 0.0 beginning_time = time.time() # del claims_tensors # del claims_text # del evidences_tensors # del evidences_text # del labels # del y # del y_pred # torch.cuda.empty_cache() true = [int(i) for i in true] pred = [int(i) for i in pred] final_accuracy = accuracy_score(true, pred) print("Final accuracy: {}".format(final_accuracy)) print(classification_report(true, pred)) for k, v in recall.items(): print("Recall@{}: {}".format(k, np.mean(v))) filename = "predicted_labels/predicted_labels" for key, value in parameters.items(): key = key.replace(" ", "_") key = key.replace("/", "_") if type(value) == str: value = value.replace("/", "_") filename += "_{}-{}".format(key, value) joblib.dump({"true": true, "pred": pred}, filename)
def main(): # Training settings parser = argparse.ArgumentParser(description='Cifar10 Example') parser.add_argument('--batch-size', type=int, default=128, metavar='N', help='input batch size for training (default: 128)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=25, metavar='N', help='number of epochs to train (default: 25)') parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='learning rate (default: 0.1)') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument('--model-path', type=str, default='', metavar='M', help='model param path') parser.add_argument('--loss-type', type=str, default='CE', metavar='L', help='B or CE or F or ICF_CE or ICF_F or CB_CE or CB_F') parser.add_argument('--beta', type=float, default=0.999, metavar='B', help='Beta for ClassBalancedLoss') parser.add_argument('--gamma', type=float, default=2.0, metavar='G', help='Gamma for FocalLoss') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--balanced-data', action='store_true', default=False, help='For sampling rate. Default is Imbalanced-data.') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') args = parser.parse_args() # Add the following code anywhere in your machine learning file experiment = Experiment(api_key="5Yl3Rxz9S3E0PUKQTBpA0QJPi", project_name="imbalanced-cifar-10", workspace="tancoro") # ブラウザの実験ページを開く # experiment.display(clear=True, wait=True, new=0, autoraise=True) # 実験キー(実験を一意に特定するためのキー)の取得 exp_key = experiment.get_key() print('KEY: ' + exp_key) # HyperParamの記録 hyper_params = { 'batch_size': args.batch_size, 'epoch': args.epochs, 'learning_rate': args.lr, 'sgd_momentum' : args.momentum, 'model_path' : args.model_path, 'loss_type' : args.loss_type, 'beta' : args.beta, 'gamma' : args.gamma, 'torch_manual_seed': args.seed, 'balanced_data' : args.balanced_data } experiment.log_parameters(hyper_params) use_cuda = not args.no_cuda and torch.cuda.is_available() print('use_cuda {}'.format(use_cuda)) torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # train dataset cifar10_train_dataset = datasets.CIFAR10('./data', train=True, download=True, transform=transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])) # train sampling rate sampling_rate = {} if not args.balanced_data: sampling_rate = {1:0.05, 4:0.05, 6:0.05} print(sampling_rate) # train Sampler train_sampler = ReductionSampler(cifar10_train_dataset, sampling_rate=sampling_rate) # train loader train_loader = torch.utils.data.DataLoader(cifar10_train_dataset, batch_size=args.batch_size, sampler=train_sampler, **kwargs) # test dataset cifar10_test_dataset = datasets.CIFAR10('./data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])) # test majority loader test_majority_sampler = ReductionSampler(cifar10_test_dataset, sampling_rate={1:0, 4:0, 6:0}) test_majority_loader = torch.utils.data.DataLoader(cifar10_test_dataset, batch_size=args.test_batch_size, sampler=test_majority_sampler, **kwargs) # test minority loader test_minority_sampler = ReductionSampler(cifar10_test_dataset, sampling_rate={0:0, 2:0, 3:0, 5:0, 7:0, 8:0, 9:0}) test_minority_loader = torch.utils.data.DataLoader(cifar10_test_dataset, batch_size=args.test_batch_size, sampler=test_minority_sampler, **kwargs) # test alldata loader test_alldata_loader = torch.utils.data.DataLoader(cifar10_test_dataset, batch_size=args.test_batch_size, shuffle=True, **kwargs) model = ResNet18().to(device) # train loss train_loss = BasicCrossEntropyLoss() if args.loss_type == 'CE': train_loss = CrossEntropyLoss(train_sampler.get_data_count_map(), device) elif args.loss_type == 'F': train_loss = FocalLoss(train_sampler.get_data_count_map(), device, gamma=args.gamma) elif args.loss_type == 'ICF_CE': train_loss = InverseClassFrequencyCrossEntropyLoss(train_sampler.get_data_count_map(), device) elif args.loss_type == 'ICF_F': train_loss = InverseClassFrequencyFocalLoss(train_sampler.get_data_count_map(), device, gamma=args.gamma) elif args.loss_type == 'CB_CE': train_loss = ClassBalancedCrossEntropyLoss(train_sampler.get_data_count_map(), device, beta=args.beta) elif args.loss_type == 'CB_F': train_loss = ClassBalancedFocalLoss(train_sampler.get_data_count_map(), device, beta=args.beta, gamma=args.gamma) print('Train Loss Type: {}'.format(type(train_loss))) # load param if len(args.model_path) > 0: model.load_state_dict(torch.load(args.model_path)) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=5e-4) # lr = 0.1 if epoch < 15 # lr = 0.01 if 15 <= epoch < 20 # lr = 0.001 if 20 <= epoch < 25 scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[15,20], gamma=0.1) for epoch in range(1, args.epochs + 1): with experiment.train(): experiment.log_current_epoch(epoch) train(args, model, device, train_loader, len(train_sampler), optimizer, epoch, experiment, lossfunc=train_loss) with experiment.test(): test(args, model, device, test_minority_loader, len(test_minority_sampler), epoch, experiment, pref='minority') test(args, model, device, test_majority_loader, len(test_majority_sampler), epoch, experiment, pref='majority') test(args, model, device, test_alldata_loader, len(test_alldata_loader.dataset), epoch, experiment, pref='all') if (args.save_model) and (epoch % 10 == 0): print('saving model to ./model/cifar10_{0}_{1:04d}.pt'.format(exp_key, epoch)) torch.save(model.state_dict(), "./model/cifar10_{0}_{1:04d}.pt".format(exp_key, epoch)) scheduler.step()
class Trainer2D: def __init__(self, config): self.experiment = Experiment(api_key="CQ4yEzhJorcxul2hHE5gxVNGu", project_name="HIP") self.experiment.log_parameters(vars(config)) self.config = config self.log_step = config.log_step self.model = conv2d.Conv2DPatches(image_size=config.image_size) print(self.model) self.d = get_dataloader2D(config) self.train_loader, self.test_loader = self.d self.train_loader_jig, self.test_loader_jig = get_dataloader2DJigSaw( config) self.net_optimizer = optim.Adam(self.model.parameters(), config.lr, [0.5, 0.9999]) if torch.cuda.is_available(): self.model = self.model.cuda() self.criterion_c = nn.CrossEntropyLoss() self.criterion_d = nn.MSELoss() self.epochs = config.epochs if torch.cuda.is_available(): print("Using CUDA") self.model = self.model.cuda() # self.model = self.model.cuda() self.pre_model_path = "./artifacts/pre_models/" + str( config.lr) + ".pth" self.model_path = "./artifacts/models/" + str(config.lr) + ".pth" self.image_size = config.image_size def pre_train(self): if os.path.isfile(self.pre_model_path): print("Using pre-trained model for solving the jigsaw puzzle") self.model = torch.load(self.pre_model_path) else: print("Starting pre-training and solving the jigsaw puzzle") for epoch in range(0): print("Starting epoch {}".format(epoch)) train_loader = iter(self.train_loader_jig) with self.experiment.train(): for i in range(len(train_loader)): self.net_optimizer.zero_grad() data, indexes, _ = train_loader.next() # print(landmarks) # print(landmarks.shape) data, indexes = self.to_var(data), self.to_var( indexes).float() B, L, H, W = data.size() B, L, S = indexes.size() print(data.size()) print(indexes.size()) jig_out, _ = self.model(data, True) loss = self.criterion_d(jig_out, indexes.view(-1, S)) loss.backward() self.net_optimizer.step() # self.plots(y_slices, landmarks[:, :, [0, 2]], detected_points) self.experiment.log_metric("pre-loss", loss.item()) print("loss: {}".format(loss.item())) torch.save(self.model, self.pre_model_path) def train(self): if os.path.isfile(self.model_path): print("Using pre-trained model") self.model = torch.load(self.model_path) if False: pass else: print("Starting training") if torch.cuda.is_available(): self.model = self.model.cuda() for epoch in range(self.epochs): print("Starting epoch {}".format(epoch)) train_loader = iter(self.train_loader) with self.experiment.train(): for i in range(len(train_loader)): self.net_optimizer.zero_grad() data, landmarks, _ = train_loader.next() # print(landmarks) data, landmarks = self.to_var(data), self.to_var( landmarks) B, L, H, W = data.size() B, L, S = landmarks.size() y = landmarks[:, :, 1].view(B, L) y_slices = torch.zeros([B, L, H, W], dtype=torch.float32) if torch.cuda.is_available(): y_slices = y_slices.cuda() for i in range(B): y_slices[i] = data[i, y[i]] jig_out, detected_points = self.model(y_slices) landmarks = landmarks.float() / self.image_size loss = self.criterion_d(detected_points, landmarks[:, :, [0, 2]]) loss.backward() self.net_optimizer.step() # self.plots(y_slices, landmarks[:, :, [0, 2]], detected_points) self.experiment.log_metric("loss", loss.item()) print("loss: {}".format(loss.item())) if epoch % self.log_step == 0: with self.experiment.test(): self.evaluate() evaluator = Evaluator(self, self.test_loader) evaluator.report() torch.save(self.model, self.model_path) evaluator = Evaluator(self, self.test_loader) evaluator.report() def evaluate(self): test_loader = iter(self.test_loader) with self.experiment.test(): loss = 0 for i in range(len(test_loader)): self.net_optimizer.zero_grad() data, landmarks, _ = test_loader.next() data, landmarks = self.to_var(data), self.to_var(landmarks) B, L, H, W = data.size() B, L, S = landmarks.size() y = landmarks[:, :, 1].view(B, L) y_slices = torch.zeros([B, L, H, W], dtype=torch.float32) if torch.cuda.is_available(): y_slices = y_slices.cuda() for i in range(B): y_slices[i] = data[i, y[i]] jig_out, detected_points = self.model(y_slices) landmarks = landmarks.float() / self.image_size loss += self.criterion_d(detected_points, landmarks[:, :, [0, 2]]).item() self.plots(y_slices.cpu(), landmarks[:, :, [0, 2]], detected_points) self.experiment.log_metric("loss", loss / len(test_loader)) def plots(self, slices, real, predicted): figure, axes = plt.subplots(nrows=4, ncols=4, figsize=(15, 15)) slices = slices[0].cpu().detach().numpy() real = real[0].cpu().detach().numpy() predicted = predicted[0].cpu().detach().numpy() real *= self.image_size predicted *= self.image_size s = 0 # print(real.size()) # print(predicted.size()) for i in range(4): for j in range(4): axes[i, j].imshow(slices[s]) x, z = real[s] axes[i, j].scatter(x, z, color="red") x, z = predicted[s] axes[i, j].scatter(x, z, color="blue") s += 1 self.experiment.log_figure(figure=plt) plt.savefig("artifacts/predictions/img.png") plt.show() def to_var(self, x): """Converts numpy to variable.""" if torch.cuda.is_available(): x = x.cuda() return Variable(x, requires_grad=False) def to_data(self, x): """Converts variable to numpy.""" if torch.cuda.is_available(): x = x.cpu() return x.data.numpy() def predict(self, x): if torch.cuda.is_available(): self.model = self.model.cuda() x = x.cuda() _, x = self.model(x) return x
class Trainer2DClassifier: def __init__(self, config): self.experiment = Experiment(api_key="CQ4yEzhJorcxul2hHE5gxVNGu", project_name="HIP") self.config = config self.experiment.log_parameters(vars(config)) self.log_step = config.log_step self.model = classifier2d.ConvClassifier() print(self.model) self.train_loader, self.test_loader = get_dataloader2DClassifier( config) self.train_loader_, self.test_loader_ = get_dataloader2D_(config) print(len(self.test_loader_)) self.net_optimizer = optim.Adam(self.model.parameters(), config.lr, [0.5, 0.9999], amsgrad=True) if torch.cuda.is_available(): self.model = self.model.cuda() self.criterion_c = nn.CrossEntropyLoss() self.criterion_d = nn.MSELoss() self.epochs = config.epochs if torch.cuda.is_available(): self.model = self.model.cuda() self.image_size = config.image_size def pre_train(self): pass # train_loader = iter(self.train_loader_) # data, classes = train_loader.next() # data, classes = self.to_var(data), self.to_var(classes) # print(self.predict(data)) def train(self): print("Starting training") for epoch in range(self.epochs): print("Starting epoch {}".format(epoch)) train_loader = iter(self.train_loader) with self.experiment.train(): for i in range(len(train_loader)): self.net_optimizer.zero_grad() data, classes_ = train_loader.next() # print(landmarks) # print(landmarks.shape) # print(data.shape) data, classes = self.to_var(data), self.to_var(classes) # B, L, H, W = data.size() # B, L = classes.size() detected_points = self.model(data) # print(classes.size()) loss = self.criterion_c(detected_points, classes) loss.backward() self.net_optimizer.step() # self.plots(y_slices, landmarks[:, :, [0, 2]], detected_points) self.experiment.log_metric("loss", loss.item()) print("loss: {}".format(loss.item())) if (epoch + 1) % self.log_step == 0: with self.experiment.test(): self.evaluate() evaluator = Evaluator(self, self.test_loader_, dim=3) evaluator.report() self.experiment.end() # @staticmethod # def accuracy(detected_points, classes): # return accuracy_score(classes, detected_points) def evaluate(self): test_loader = iter(self.test_loader) with self.experiment.test(): loss = 0 accuracy = 0 for i in range(len(test_loader)): self.net_optimizer.zero_grad() data, classes, _ = test_loader.next() # print(landmarks) # print(landmarks.shape) data, classes = self.to_var(data), self.to_var(classes) # B, L, H, W = data.size() # B, L, S = classes.size() detected_points = self.model(data) loss += self.criterion_c(detected_points, classes).item() accuracy += accuracy_function( classes.cpu().detach(), detected_points.cpu().detach()).item() # self.plots(y_slices, landmarks[:, :, [0, 2]], detected_points) print("loss", loss / len(test_loader)) self.experiment.log_metric("loss", loss / len(test_loader)) print("accuracy", accuracy / len(test_loader)) self.experiment.log_metric("accuracy", accuracy / len(test_loader)) def plots(self, slices, real, predicted): pass # figure, axes = plt.subplots(nrows=4, ncols=4, figsize=(15, 15)) # slices = slices[0].cpu().detach().numpy() # real = real[0].cpu().detach().numpy() # predicted = predicted[0].cpu().detach().numpy() # real *= self.image_size # predicted *= self.image_size # s = 0 # # print(real.size()) # # print(predicted.size()) # for i in range(4): # for j in range(4): # axes[i, j].imshow(slices[s]) # x, z = real[s] # axes[i, j].scatter(x, z, color="red") # x, z = predicted[s] # axes[i, j].scatter(x, z, color="blue") # s += 1 # self.experiment.log_figure(figure=plt) # plt.savefig("artifacts/predictions/img.png") # plt.show() def to_var(self, x): """Converts numpy to variable.""" if torch.cuda.is_available(): x = x.cuda() return Variable(x, requires_grad=False) def to_data(self, x): """Converts variable to numpy.""" if torch.cuda.is_available(): x = x.cpu() return x.data.numpy() def predict(self, X): if torch.cuda.is_available(): X = X.cuda() B, L, H, W = X.size() # heat_map = torch.from_numpy(np.zeros((B, L, H - 20, W - 20)) - 5) # if torch.cuda.is_available(): # heat_map = heat_map.cuda() mean_x = np.array([ 0.8, 0.77, 0.81, 0.8, 0.85, 0.85, 0.74, 0.74, 0.2, 0.25, 0.20, 0.24, 0.17, 0.17, 0.24, 0.24 ]) * self.image_size mean_z = np.array([ 0.42, 0.48, 0.52, 0.51, 0.7, 0.68, 0.43, 0.25, 0.42, 0.48, 0.53, 0.85, 0.73, 0.63, 0.46, 0.21 ]) * self.image_size def get_top(scores, indexes, k=10): ind = np.argsort(scores) xs = [] zs = [] for i in range(-1, -k, -1): x, z = indexes[ind[i]] xs.append(x) zs.append(z) return np.median(xs), np.median(zs) landmarks = torch.zeros(B, 16, 3) for b in range(B): for l in range(L): indexes = [] scores = [] for h in range(max(0, int(mean_x[l]) - 8), min(H - 20, int(mean_x[l]) + 8)): for w in range(max(0, int(mean_z[l]) - 8), min(W - 20, int(mean_z[l]) + 8)): w = int(w) h = int(h) data = X[b, l, h:h + 20, w:w + 20] predicted_labels = self.model(data.view(1, 1, 20, 20)).view( 1, -1) # print(predicted_labels[0][l].item()) _, pred = torch.max(predicted_labels, 1) indexes.append((w, h)) scores.append(predicted_labels[0][l].item()) x, z = get_top(scores, indexes) landmarks[b, l, 0] = 0.5 landmarks[b, l, 1] = x / self.image_size landmarks[b, l, 2] = z / self.image_size # landmarks = torch.zeros(B, 16, 3) if torch.cuda.is_available(): landmarks = landmarks.cuda() # for b in range(B): # for class_id in range(16): # hottest_areas = np.ma.MaskedArray(heat_map.cpu().detach().numpy()[b], # heat_map.cpu().detach().numpy()[b] != class_id) # X, Y, Z = hottest_areas.nonzero() # if len(X) == 0: # x, y, z = mean_x[b], 0.5, mean_z[b] # else: # x = np.median(X) # y = np.median(Y) # z = np.median(Z) # print(class_id, len(X)) # landmarks[b, class_id, 0] = y # landmarks[b, class_id, 1] = x # landmarks[b, class_id, 2] = z return landmarks
class Experiment: """ A helper class to facilitate the training and validation procedure of the GoTurnRemix model Parameters ---------- learning_rate: float Learning rate to train the model. The optimizer is SGD and the loss is L1 Loss image_size: int The size of the input image. This has to be fixed before the data is created data_path: Path Path to the data folder. If the folder name includes "pickle", then the data saved as pickles are loaded augment: bool Perform augmentation on the images before training logs_path: Path Path to save the validation predictions at the end of each epoch models_path: Path Path to save the model state at the end of each epoch save_name: str Name of the folder in which the logs and models are saved. If not provided, the current datetime is used """ def __init__(self, learning_rate: float, image_size: int, data_path: Path, augment: bool = True, logs_path: Path = None, models_path: Path = None, save_name: str = None, comet_api: str = None): self.image_size = image_size self.logs_path = logs_path self.models_path = models_path self.model = GoTurnRemix() self.model.cuda() self.criterion = torch.nn.L1Loss() self.optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, self.model.parameters()), lr=learning_rate) self.model_name = str(datetime.datetime.now()).split('.')[0].replace( ':', '-').replace(' ', '-') self.model_name = save_name if save_name else self.model_name self.augment = augment self.data = Data(data_path, target_size=self.image_size, transforms=augment) self.comet = None if comet_api: self.comet = Comet(api_key=comet_api) self.comet.log_parameter('learning_rate', learning_rate) self.comet.log_parameter('image_size', image_size) self.comet.log_parameter('augment', augment) def __train_step__(self, data): """ Performs one step of the training procedure Parameters ---------- data data obtained from @Data.__getitem__ Returns ------- Loss at the end of training step """ if self.comet: self.comet.train() previous_cropped, current_cropped, bbox, scale, crop = data previous_cropped = torch.div(previous_cropped, 255).float().cuda() current_cropped = torch.div(current_cropped, 255).float().cuda() previous_cropped = torch.autograd.Variable(previous_cropped, requires_grad=True) current_cropped = torch.autograd.Variable(current_cropped, requires_grad=True) bbox = bbox.requires_grad_(True).float().cuda() self.optimizer.zero_grad() preds = self.model(previous_cropped, current_cropped) del previous_cropped del current_cropped gc.collect() loss = self.criterion(preds, bbox) if self.comet: self.comet.log_metric('loss', loss) loss.backward() self.optimizer.step() return loss def __test__(self): """ Test tracking of the model Returns ------- Test loss and test predictions """ # Set model to evaluation mode if self.comet: self.comet.test() self.model.eval() test_preds = [] test_loss = [] video_frames = self.data.video_frames[-1] video_annotations = self.data.video_annotations[-1] p_a = video_annotations[0] p_f = video_frames[0] test_preds.append(p_a) for i in tqdm(range(1, len(video_annotations)), desc='Validating'): c_a = video_annotations[i] c_f = video_frames[i] p_c, c_c, bbox, scale, crop = self.data.make_crops( p_f, c_f, p_a, c_a) p_c = torch.div(torch.from_numpy(p_c), 255).unsqueeze(0).float().cuda() c_c = torch.div(torch.from_numpy(c_c), 255).unsqueeze(0).float().cuda() bbox = torch.tensor(bbox, requires_grad=False).float().cuda() preds = self.model(p_c, c_c) del p_c del c_c gc.collect() loss = torch.nn.functional.l1_loss(preds, bbox) if self.comet: self.comet.log_metric('val_loss', loss) test_loss.append(loss.item()) preds = self.data.get_bbox(preds.cpu().detach().numpy()[0], self.image_size, scale, crop) test_preds.append(preds) p_a = preds p_f = c_f return test_loss, test_preds def __validate__(self): """ Performs validation on the model Returns ------- Validation loss and validation predictions """ # Set model to evaluation mode if self.comet: self.comet.validate() self.model.eval() validation_preds = [] validation_loss = [] video_frames = self.data.video_frames[-1] video_annotations = self.data.video_annotations[-1] p_a = video_annotations[0] p_f = video_frames[0] validation_preds.append(p_a) for i in tqdm(range(1, len(video_annotations)), desc='Validating'): c_a = video_annotations[i] c_f = video_frames[i] p_c, c_c, bbox, scale, crop = self.data.make_crops( p_f, c_f, p_a, c_a) p_c = torch.div(torch.from_numpy(p_c), 255).unsqueeze(0).float().cuda() c_c = torch.div(torch.from_numpy(c_c), 255).unsqueeze(0).float().cuda() bbox = torch.tensor(bbox, requires_grad=False).float().cuda() preds = self.model(p_c, c_c) del p_c del c_c gc.collect() loss = torch.nn.functional.l1_loss(preds, bbox) if self.comet: self.comet.log_metric('val_loss', loss) validation_loss.append(loss.item()) preds = self.data.get_bbox(preds.cpu().detach().numpy()[0], self.image_size, scale, crop) validation_preds.append(preds) p_a = c_a p_f = c_f return validation_loss, validation_preds def train(self, epochs: int, batch_size: int, validate: bool = True, test: bool = True): """ Trains the model for @epochs number of epochs Parameters ---------- epochs: int Number of epochs to train the model batch_size: int The size of each batch when training the model validate: bool, default=True If True, validation occurs at the end of each epoch The results are saved in @logs_path and models are saved in @models_path test: bool, default=True If True, the model is tested for tracking at the end of the training procedure The results are saved in @logs_path Returns ------- list: List containing the training loss at the end of each epoch """ if self.comet: self.comet.log_parameter('epochs', epochs) self.comet.log_parameter('batch_size', batch_size) loss_per_epoch = [] preds_per_epoch = [] # Set the model to training mode self.model.train() # Create a DataLoader to feed data to the model dataloader = torch.utils.data.DataLoader(dataset=self.data, batch_size=batch_size, shuffle=True) # Run for @epochs number of epochs for epoch in range(epochs): if self.comet: self.comet.log_metric('epoch', epoch) running_loss = [] for step, data in enumerate( tqdm(dataloader, total=int(len(self.data) / batch_size), desc='Epoch {}'.format(epoch))): loss = self.__train_step__(data) running_loss.append(loss.item()) training_loss = sum(running_loss) / len(running_loss) if self.comet: self.comet.log_metric('mean_train_loss', training_loss) loss_per_epoch.append(sum(running_loss) / len(running_loss)) if validate: validation_loss, validation_preds = self.__validate__() if self.comet: self.comet.log_metric('mean_validation_loss', validation_loss) preds_per_epoch.append(validation_preds) print('Validation loss: {}'.format( sum(validation_loss) / len(validation_loss))) # Save the model at this stage if self.models_path: (self.models_path / self.model_name).mkdir(exist_ok=True) torch.save(self.model, (self.models_path / self.model_name / 'epoch_{}'.format(epoch)).resolve()) print('Training Loss: {}'.format(training_loss)) # Save the validation frames, ground truths and predictions at this stage if self.logs_path: (self.logs_path / self.model_name).mkdir(exist_ok=True) save = { 'frames': self.data.video_frames[-1], 'truth': self.data.video_annotations[-1], 'preds': preds_per_epoch } np.save( str((self.logs_path / self.model_name / 'preds_per_epoch.npy').resolve()), save) # Test the model and save the results if test: test_loss, test_preds = self.__test__() if self.logs_path: (self.logs_path / self.model_name).mkdir(exist_ok=True) save = { 'frames': self.data.video_frames[-1], 'truth': self.data.video_annotations[-1], 'preds': test_preds, 'loss': test_loss } np.save( str((self.logs_path / self.model_name / 'test_preds.npy').resolve()), save) return loss_per_epoch
def main(args): train_set = KBDataset(args.train_ratings, args.prefetch_to_gpu) test_set = KBDataset(args.test_ratings, args.prefetch_to_gpu) train_fairness_set = NodeClassification(args.users_train, args.prefetch_to_gpu) test_fairness_set = NodeClassification(args.users_test, args.prefetch_to_gpu) if args.prefetch_to_gpu: train_hash = set( [r.tobytes() for r in train_set.dataset.cpu().numpy()]) else: train_hash = set([r.tobytes() for r in train_set.dataset]) all_hash = train_hash.copy() all_hash.update(set([r.tobytes() for r in test_set.dataset])) ''' Comet Logging ''' experiment = Experiment(api_key=args.api_key, disabled=not args.do_log, project_name=args.project_name, workspace=args.workspace) experiment.set_name(args.namestr) if not args.use_gcmc: # modelD = TransD(args.num_ent, args.num_rel, args.embed_dim,\ # args.p).to(args.device) modelD = TransE(args.num_ent, args.num_rel, args.embed_dim,\ args.p).to(args.device) else: decoder = SharedBilinearDecoder(args.num_rel, 2, args.embed_dim).to(args.device) modelD = SimpleGCMC(decoder, args.embed_dim, args.num_ent, args.p).to(args.device) ''' Initialize Everything to None ''' fairD_gender, fairD_occupation, fairD_age, fairD_random = None, None, None, None optimizer_fairD_gender, optimizer_fairD_occupation, \ optimizer_fairD_age, optimizer_fairD_random = None,None,None,None gender_filter, occupation_filter, age_filter = None, None, None if args.use_attr: attr_data = [args.users, args.movies] ''' Initialize Discriminators ''' fairD_gender = GenderDiscriminator(args.use_1M,args.embed_dim,attr_data,\ 'gender',use_cross_entropy=args.use_cross_entropy).to(args.device) fairD_occupation = OccupationDiscriminator(args.use_1M,args.embed_dim,attr_data,\ attribute='occupation',use_cross_entropy=args.use_cross_entropy) fairD_age = AgeDiscriminator(args.use_1M,args.embed_dim,attr_data,\ attribute='age',use_cross_entropy=args.use_cross_entropy) ''' Initialize Optimizers ''' if args.sample_mask: gender_filter = AttributeFilter(args.embed_dim, attribute='gender').to(args.device) occupation_filter = AttributeFilter(args.embed_dim, attribute='occupation').to( args.device) age_filter = AttributeFilter(args.embed_dim, attribute='age').to(args.device) optimizer_fairD_gender = optimizer(fairD_gender.parameters(), 'adam', args.lr) optimizer_fairD_occupation = optimizer( fairD_occupation.parameters(), 'adam', args.lr) optimizer_fairD_age = optimizer(fairD_age.parameters(), 'adam', args.lr) elif args.use_occ_attr: attr_data = [args.users, args.movies] fairD_occupation = OccupationDiscriminator(args.use_1M,args.embed_dim,attr_data,\ attribute='occupation',use_cross_entropy=args.use_cross_entropy) optimizer_fairD_occupation = optimizer(fairD_occupation.parameters(), 'adam', args.lr) elif args.use_gender_attr: attr_data = [args.users, args.movies] fairD_gender = GenderDiscriminator(args.use_1M,args.embed_dim,attr_data,\ 'gender',use_cross_entropy=args.use_cross_entropy) optimizer_fairD_gender = optimizer(fairD_gender.parameters(), 'adam', args.lr) elif args.use_age_attr: attr_data = [args.users, args.movies] fairD_age = AgeDiscriminator(args.use_1M,args.embed_dim,attr_data,\ attribute='age',use_cross_entropy=args.use_cross_entropy) optimizer_fairD_age = optimizer(fairD_age.parameters(), 'adam', args.lr) elif args.use_random_attr: attr_data = [args.users, args.movies] fairD_random = RandomDiscriminator(args.use_1M,args.embed_dim,attr_data,\ 'random',use_cross_entropy=args.use_cross_entropy).to(args.device) # fairD_random = DemParDisc(args.use_1M,args.embed_dim,attr_data,\ # attribute='random',use_cross_entropy=args.use_cross_entropy) optimizer_fairD_random = optimizer(fairD_random.parameters(), 'adam', args.lr) if args.load_transD: modelD.load(args.saved_path) if args.load_filters: gender_filter.load(args.gender_filter_saved_path) occupation_filter.load(args.occupation_filter_saved_path) age_filter.load(args.age_filter_saved_path) ''' Create Sets ''' fairD_set = [fairD_gender, fairD_occupation, fairD_age, fairD_random] filter_set = [gender_filter, occupation_filter, age_filter, None] optimizer_fairD_set = [optimizer_fairD_gender, optimizer_fairD_occupation,\ optimizer_fairD_age,optimizer_fairD_random] ''' Initialize CUDA if Available ''' if args.use_cuda: for fairD, filter_ in zip(fairD_set, filter_set): if fairD is not None: fairD.to(args.device) if filter_ is not None: filter_.to(args.device) if args.use_gcmc: if args.sample_mask and not args.use_trained_filters: optimizerD = optimizer(list(modelD.parameters()) + \ list(gender_filter.parameters()) + \ list(occupation_filter.parameters()) + \ list(age_filter.parameters()), 'adam', args.lr) # optimizer_fairD_gender = optimizer(list(fairD_gender.parameters()) + \ # list(gender_filter.parameters()),'adam', args.lr) else: optimizerD = optimizer(modelD.parameters(), 'adam', args.lr) else: optimizerD = optimizer(modelD.parameters(), 'adam_sparse', args.lr) _cst_inds = torch.LongTensor(np.arange(args.num_ent, \ dtype=np.int64)[:,None]).to(args.device).repeat(1, args.batch_size//2) _cst_s = torch.LongTensor(np.arange(args.batch_size // 2)).to(args.device) _cst_s_nb = torch.LongTensor( np.arange(args.batch_size // 2, args.batch_size)).to(args.device) _cst_nb = torch.LongTensor(np.arange(args.batch_size)).to(args.device) if args.prefetch_to_gpu: train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=0, collate_fn=collate_fn) else: train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=4, pin_memory=True, collate_fn=collate_fn) if args.freeze_transD: freeze_model(modelD) if args.debug: attr_data = [args.users, args.movies] ipdb.set_trace() ''' Joint Training ''' if not args.dont_train: with experiment.train(): for epoch in tqdm(range(1, args.num_epochs + 1)): if epoch % args.valid_freq == 0 or epoch == 1: with torch.no_grad(): if args.use_gcmc: rmse, test_loss = test_gcmc( test_set, args, modelD, filter_set) else: # l_ranks,r_ranks,avg_mr,avg_mrr,avg_h10,avg_h5 = test(test_set, args, all_hash,\ # modelD,subsample=20) test_nce(test_set, args, modelD, epoch, experiment) if args.use_attr: test_gender(args, test_fairness_set, modelD, fairD_gender, experiment, epoch, filter_set) test_occupation(args, test_fairness_set, modelD, fairD_occupation, experiment, epoch, filter_set) test_age(args, test_fairness_set, modelD, fairD_age, experiment, epoch, filter_set) elif args.use_gender_attr: test_gender(args, test_fairness_set, modelD, fairD_gender, experiment, epoch, filter_set) elif args.use_occ_attr: test_occupation(args, test_fairness_set, modelD, fairD_occupation, experiment, epoch, filter_set) elif args.use_age_attr: test_age(args, test_fairness_set, modelD, fairD_age, experiment, epoch, filter_set) elif args.use_random_attr: test_random(args, test_fairness_set, modelD, fairD_random, experiment, epoch, filter_set) # test_fairness(test_fairness_set,args,modelD,experiment,\ # fairD_random,attribute='random',\ # epoch=epoch) if args.do_log: # Tensorboard logging if args.use_gcmc: experiment.log_metric("RMSE", float(rmse), step=epoch) experiment.log_metric("Test Loss", float(rmse), step=epoch) # else: # experiment.log_metric("Mean Rank",float(avg_mr),step=epoch) # experiment.log_metric("Mean Reciprocal Rank",\ # float(avg_mrr),step=epoch) # experiment.log_metric("Hit @10",float(avg_h10),step=epoch) # experiment.log_metric("Hit @5",float(avg_h5),step=epoch) train(train_loader,epoch,args,train_hash,modelD,optimizerD,\ fairD_set,optimizer_fairD_set,filter_set,experiment) gc.collect() if epoch % (args.valid_freq * 5) == 0: if args.use_gcmc: rmse = test_gcmc(test_set, args, modelD) else: test_nce(test_set, args, modelD, epoch, experiment) # l_ranks,r_ranks,avg_mr,avg_mrr,avg_h10,avg_h5 = test(test_set,args, all_hash,\ # modelD,subsample=20) # if not args.use_gcmc: # l_ranks,r_ranks,avg_mr,avg_mrr,avg_h10,avg_h5 = test(test_set,args, all_hash, modelD) # joblib.dump({'l_ranks':l_ranks, 'r_ranks':r_ranks}, args.outname_base+'test_ranks.pkl', compress=9) modelD.save(args.outname_base + 'D_final.pts') if args.use_attr or args.use_gender_attr: fairD_gender.save(args.outname_base + 'GenderFairD_final.pts') if args.use_attr or args.use_occ_attr: fairD_occupation.save(args.outname_base + 'OccupationFairD_final.pts') if args.use_attr or args.use_age_attr: fairD_age.save(args.outname_base + 'AgeFairD_final.pts') if args.use_random_attr: fairD_random.save(args.outname_base + 'RandomFairD_final.pts') if args.sample_mask: gender_filter.save(args.outname_base + 'GenderFilter.pts') occupation_filter.save(args.outname_base + 'OccupationFilter.pts') age_filter.save(args.outname_base + 'AgeFilter.pts') constant = len(fairD_set) - fairD_set.count(None) if args.test_new_disc: if args.test_new_disc: args.use_attr = True ''' Training Fresh Discriminators''' args.freeze_transD = True attr_data = [args.users, args.movies] if args.use_random_attr: new_fairD_random = DemParDisc(args.use_1M,args.embed_dim,attr_data,\ attribute='random',use_cross_entropy=args.use_cross_entropy).to(args.device) new_optimizer_fairD_random = optimizer( new_fairD_random.parameters(), 'adam', args.lr) freeze_model(modelD) with experiment.test(): ''' Train Classifier ''' if args.use_gender_attr or args.use_attr: train_gender(args,modelD,train_fairness_set,test_fairness_set,\ attr_data,experiment,filter_set) if args.use_occ_attr or args.use_attr: train_occupation(args,modelD,train_fairness_set,test_fairness_set,\ attr_data,experiment,filter_set) if args.use_age_attr or args.use_attr: train_age(args,modelD,train_fairness_set,test_fairness_set,\ attr_data,experiment,filter_set) if args.use_random_attr: train_random(args,modelD,train_fairness_set,test_fairness_set,\ attr_data,experiment,filter_set) # train_fairness_classifier(train_fairness_set,args,modelD,experiment,new_fairD_random,\ # new_optimizer_fairD_random,epoch,filter_=None,retrain=False) if args.report_bias: gender_bias = calc_attribute_bias('Train',args,modelD,experiment,\ 'gender',epoch,[gender_filter]) occ_bias = calc_attribute_bias('Train',args,modelD,experiment,\ 'occupation',epoch,[occupation_filter]) age_bias = calc_attribute_bias('Train',args,modelD,experiment,\ 'age',epoch,[age_filter]) gender_bias = calc_attribute_bias('Test',args,modelD,experiment,\ 'gender',epoch,[gender_filter]) occ_bias = calc_attribute_bias('Test',args,modelD,experiment,\ 'occupation',epoch,[occupation_filter]) age_bias = calc_attribute_bias('Test',args,modelD,experiment,\ 'age',epoch,[age_filter]) experiment.end()
padding='same', activation=params['activation'])) model.add(Dropout(params['dropout'])) model.add(Flatten()) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer=params['optimizer'], metrics=['accuracy']) #print model.summary() to preserve automatically in `Output` tab print(model.summary()) params.update({'total_number_of_parameters': model.count_params()}) #will log metrics with the prefix 'train_' with experiment.train(): model.fit(X_train, y_train, epochs=params['epochs'], batch_size=params['batch_size'], verbose=1, validation_data=(X_test, y_test)) #will log metrics with the prefix 'test_' with experiment.test(): loss, accuracy = model.evaluate(X_test, y_test) metrics = {'loss': loss, 'accuracy': accuracy} experiment.log_multiple_metrics(metrics) experiment.log_multiple_params(params) experiment.log_dataset_hash(X_train) #creates and logs a hash of your data
def main(args: argparse.Namespace): for package_name in args.include_package: import_module_and_submodules(package_name) params = Params.from_file(args.param_path, args.overrides) random_seed, numpy_seed, pytorch_seed = 41, 11, 302 if not args.fix: random_seed, numpy_seed, pytorch_seed = random.randint( 0, 999999999), random.randint(0, 999999999), random.randint( 0, 999999999) params["random_seed"] = random_seed params["numpy_seed"] = numpy_seed params["pytorch_seed"] = pytorch_seed prepare_environment(params) serialization_dir = args.serialization_dir create_serialization_dir(params, serialization_dir, args.recover, args.force) prepare_global_logging(serialization_dir, args.file_friendly_logging) hyperparams = list( get_hyperparams(params.as_dict(infer_type_and_cast=True))) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) test_file = params.params.get("test_data_path", None) validation_data_path = params.get("validation_data_path", None) evaluate_on_test = params.pop_bool("evaluate_on_test", False) test_command = None if evaluate_on_test: test_command = BaseEvaluationCommand.from_params( params.pop("test_command")) cuda_device = params.params.get('trainer').get('cuda_device', -1) check_for_gpu(cuda_device) train_model = TrainPipelineModel.from_params( params=params, serialization_dir=serialization_dir, local_rank=0) trainer = train_model.trainer if trainer.validation_command is not None: trainer.validation_command.maybe_set_gold_file(validation_data_path) params.assert_empty('base train command') if args.comet is not None: experiment = Experiment(api_key=args.comet, workspace=args.workspace, project_name=args.project, parse_args=False, auto_output_logging=None) if args.tags: experiment.add_tags(args.tags) with open(args.param_path) as fil: code = "".join(fil.readlines()) code += "\n\n#=============Full details=============\n\n" full_details = _jsonnet.evaluate_file(args.param_path) code += full_details code += "\n\n#=============IMPORTANT: overwritten options============\n\n" code += args.overrides experiment.set_code(code, overwrite=True) for key, val in hyperparams: experiment.log_parameter(key, val) experiment.log_parameter("model_directory", serialization_dir) experiment.log_parameter("cuda_device", cuda_device) experiment.log_parameter("hostname", socket.gethostname()) experiment.log_parameter("random_seed", random_seed) experiment.log_parameter("numpy_seed", numpy_seed) experiment.log_parameter("pytorch_seed", pytorch_seed) else: experiment = None try: metrics = trainer.train(experiment) except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir) raise # Evaluate if test_file and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights (see pred_test.txt)." ) trainer.annotator.annotate_file( trainer.model, test_file, os.path.join(serialization_dir, "pred_test.txt")) if test_command: logger.info("Comparing against gold standard.") test_command.maybe_set_gold_file(test_file) test_metrics = test_command.evaluate( os.path.join(serialization_dir, "pred_test.txt")) if experiment: with experiment.test(): experiment.log_metrics({ k: v for k, v in test_metrics.items() if np.isscalar(v) }) metrics = merge_dicts(metrics, "test", test_metrics) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) if not args.no_archive: # Now tar up results archive_model(serialization_dir)