def compute_task_metrics(task_name, logits, labels): if logits.shape[1] == 1: pred_arr = logits.reshape(-1) else: pred_arr = np.argmax(logits, axis=1) return compute_metrics( task_name=task_name, pred_srs=pred_arr, label_srs=labels, )
per_epoch_loss += loss.item() per_epoch_loss = per_epoch_loss / num_batches return predictions, diff_targets, all_attention_bin, all_attention_hm, per_epoch_loss, all_gene_ids best_valid_loss = 10000000000 best_valid_avgAUPR = -1 best_valid_avgAUC = -1 best_test_avgAUC = -1 if (args.test_on_saved_model == False): for epoch in range(0, args.epochs): print('---------------------------------------- Training ' + str(epoch + 1) + ' -----------------------------------') predictions, diff_targets, alpha_train, beta_train, train_loss, _ = train( Train) train_avgAUPR, train_avgAUC = evaluate.compute_metrics( predictions, diff_targets) predictions, diff_targets, alpha_valid, beta_valid, valid_loss, gene_ids_valid = test( Valid, "Validation") valid_avgAUPR, valid_avgAUC = evaluate.compute_metrics( predictions, diff_targets) predictions, diff_targets, alpha_test, beta_test, test_loss, gene_ids_test = test( Test, 'Testing') test_avgAUPR, test_avgAUC = evaluate.compute_metrics( predictions, diff_targets) if (valid_avgAUC >= best_valid_avgAUC): # save best epoch -- models converge early best_valid_avgAUC = valid_avgAUC best_test_avgAUC = test_avgAUC
def run_model(name, context, conf, double_input, use_elmo=False, save_predictions=False, save_model=False): """ Runs the given model 'name' for the given 'context' and agreement level 'conf'. If double_input is True, runs the combined model using context comment text. Optionally saves the trained model & its vocabulary, and predictions. Allowed names: lstm | bilstm | stacked_bilstm | cnn | dense_lstm | dense_bilstm | dense_stacked_bilstm | dense_cnn | nli_cnn | bert | dense_bert If use_elmo=True, uses ELMo's pre-trained language model for embeddings. """ if use_elmo: token_indexer = ELMoTokenCharactersIndexer() # token indexer is responsible for mapping tokens to integers: this makes sure that the mapping is consistent with what was used in the original ELMo training. elif name == 'bert': global bert_token_indexer bert_token_indexer = PretrainedBertIndexer(pretrained_model=BERT_MODEL, do_lowercase=True) else: token_indexer = SingleIdTokenIndexer() if name == 'bert': # BERT uses a special wordpiece tokenizer reader = data_reader.UnpalatableDatasetReader(main_input=context, additional_context=double_input, tokenizer=tokenizer_bert, token_indexers={"tokens": bert_token_indexer}, label_cols=LABEL_COLS) else: reader = data_reader.UnpalatableDatasetReader(main_input=context, additional_context=double_input, tokenizer=tokenizer, token_indexers={"tokens": token_indexer}, label_cols=LABEL_COLS) map_reply_id_pred_probability = {}; n_epochs = [] f1s, AUROCs, weighted_f1s, precision_s, recall_s, accuracies, AUPRCs = [], [], [], [], [], [], [] for fold_number in range(1,6): # 5-fold cross validation train_fname = 'train_data_fold_'+str(fold_number)+'_OneHot.csv' val_fname = 'val_data_fold_'+str(fold_number)+'_OneHot.csv' test_fname = 'test_data_fold_'+str(fold_number)+'_OneHot.csv' train_dataset = reader.read(file_path=DATA_ROOT / conf / train_fname) validation_dataset = reader.read(file_path=DATA_ROOT / conf / val_fname) test_dataset = reader.read(file_path=DATA_ROOT / conf / test_fname) print("\n#####################################################\n", double_input, context, conf, name, len(train_dataset), len(validation_dataset), len(test_dataset)) # Train model: if name == 'lstm': model, vocab, ep = train.train_lstm(train_dataset, validation_dataset, BATCH_SIZE, dense_vector=False, num_layers=1, bidirectional=False, use_elmo=use_elmo, double_input=double_input) elif name == 'dense_lstm': model, vocab, ep = train.train_lstm(train_dataset, validation_dataset, BATCH_SIZE, dense_vector=True, col_name=context, num_layers=1, bidirectional=False, use_elmo=use_elmo, double_input=double_input) elif name == 'bilstm': model, vocab, ep = train.train_lstm(train_dataset, validation_dataset, BATCH_SIZE, dense_vector=False, num_layers=1, bidirectional=True, use_elmo=use_elmo, double_input=double_input) elif name == 'dense_bilstm': model, vocab, ep = train.train_lstm(train_dataset, validation_dataset, BATCH_SIZE, dense_vector=True, col_name=context, num_layers=1, bidirectional=True, use_elmo=use_elmo, double_input=double_input) elif name == 'stacked_bilstm': model, vocab, ep = train.train_lstm(train_dataset, validation_dataset, BATCH_SIZE, dense_vector=False, num_layers=2, bidirectional=True, use_elmo=use_elmo, double_input=double_input) elif name == 'dense_stacked_bilstm': model, vocab, ep = train.train_lstm(train_dataset, validation_dataset, BATCH_SIZE, dense_vector=True, col_name=context, num_layers=2, bidirectional=True, use_elmo=use_elmo, double_input=double_input) elif name == 'cnn': if context == 'reply_text': filter_sizes = (2,3) # kernels can not be bigger than the shortest sentence else: filter_sizes = (2,) model, vocab, ep = train.train_cnn(train_dataset, validation_dataset, BATCH_SIZE, dense_vector=False, num_filters=100, filter_sizes=filter_sizes, use_elmo=use_elmo, double_input=double_input) elif name == 'dense_cnn': if context == 'reply_text': filter_sizes = (2,3) # kernels can not be bigger than the shortest sentence else: filter_sizes = (2,) model, vocab, ep = train.train_cnn(train_dataset, validation_dataset, BATCH_SIZE, dense_vector=True, col_name=context, num_filters=100, filter_sizes=filter_sizes, use_elmo=use_elmo, double_input=double_input) elif name == 'nli_cnn': if double_input == False: print("Error: NLI-inspired architecture only accepts double-input.") return [None]*9 filter_sizes = (2,3) model, vocab, ep = train.train_nli(train_dataset, validation_dataset, BATCH_SIZE, use_elmo=use_elmo, num_filters=100, filter_sizes=filter_sizes) elif name == 'bert': model, vocab, ep = train.train_bert(train_dataset, validation_dataset, BATCH_SIZE, pretrained_model=BERT_MODEL, dense_vector=False, double_input=double_input) elif name == 'dense_bert': model, vocab, ep = train.train_bert(train_dataset, validation_dataset, BATCH_SIZE, pretrained_model=BERT_MODEL, dense_vector=True, col_name=context, double_input=double_input) else: sys.exit("'name' not valid") n_epochs.append(ep) # keep track of number of actual training epochs for each fold # Predict and evaluate model on test set: preds = evaluate.make_predictions(model, vocab, test_dataset, BATCH_SIZE, use_gpu=False) # NOTE: preds is of shape (number of samples, 2) - the columns represent the probabilities for the two classes in order ['yes_unp', 'not_unp'] f1, auroc, w_f1, precision, recall, acc, auprc = evaluate.compute_metrics(preds, test_dataset) if save_predictions: # save predictions for error analysis replyid_pred = evaluate.map_id_prediction(preds, test_dataset) if set(replyid_pred.keys()).intersection(set(map_reply_id_pred_probability.keys())) != set(): # sanity check sys.exit("Error: There is overlap in Test IDs across folds.") map_reply_id_pred_probability.update(replyid_pred) if save_model: # save the model weights and vocabulary with open('./tmp/'+name+'_model_conf_'+conf.split('-')[1]+'_fold_'+str(fold_number)+'.th', 'wb') as f: torch.save(model.state_dict(), f) vocab.save_to_files("./tmp/"+name+"_vocabulary_"+conf.split('-')[1]+"_fold_"+str(fold_number)) print("\nFold #{} | F1 = {} | AUROC = {} | AUPRC = {}".format(fold_number, f1, auroc, auprc)) f1s.append(f1); AUROCs.append(auroc); weighted_f1s.append(w_f1); precision_s.append(precision); recall_s.append(recall); accuracies.append(acc); AUPRCs.append(auprc) mean_f1 = np.array(f1s).mean(); mean_auroc = np.array(AUROCs).mean(); mean_weighted_f1 = np.array(weighted_f1s).mean(); mean_precision = np.array(precision_s).mean(); mean_recall = np.array(recall_s).mean(); mean_accuracy = np.array(accuracies).mean(); mean_auprc = np.array(AUPRCs).mean() print("Total predictions: {} | Save Predictions: {}".format(len(map_reply_id_pred_probability), save_predictions)) return mean_f1, mean_auroc, mean_weighted_f1, mean_precision, mean_recall, mean_accuracy, mean_auprc, map_reply_id_pred_probability, n_epochs
diff_predictions[start:end] = batch_diff_predictions.data.cpu() per_epoch_loss += loss.item() per_epoch_loss = per_epoch_loss / num_batches return diff_predictions, diff_targets, all_attention_bin, all_attention_hm, per_epoch_loss, all_gene_ids best_valid_loss = 10000000000 best_valid_MSE = 100000 best_valid_R2 = -1 if (args.test_on_saved_model == False): for epoch in range(0, args.epochs): print('=---------------------------------------- Training ' + str(epoch + 1) + ' -----------------------------------=') diff_predictions, diff_targets, alpha_train, beta_train, train_loss, _ = train( Train) train_MSE, train_R2 = evaluate.compute_metrics(diff_predictions, diff_targets) diff_predictions, diff_targets, alpha_valid, beta_valid, valid_loss, gene_ids_valid = test( Valid) valid_MSE, valid_R2 = evaluate.compute_metrics(diff_predictions, diff_targets) if (valid_R2 >= best_valid_R2): # save best epoch -- models converge early best_valid_R2 = valid_R2 torch.save(model, model_dir + "/" + model_name + '_R2_model.pt') print("Epoch:", epoch) print("train R2:", train_R2) print("valid R2:", valid_R2) print("best valid R2:", best_valid_R2)
def run_pipeline(ignition_file, persist_all, load_all_fresh): """ An adhoc pipeline created to mirror the standard ML pipeline and work with citations data. Parameters: =========== ignition_file: string name of the yaml file for which you want to run an experiment persist_all: boolean T if you want to persist all data for future use load_all_fresh: boolean T if you want to avoid any persisted data and load new data from scrath Returns: ======== None """ model_parts = {} ##### 1. LOAD ENVIRONMENT DATA ##### # load local paths local_paths_env = load_local_paths('local_paths.yaml') print('Local paths loaded.') # load ignition file ignition = load_config(local_paths_env['ignition_path'] + ignition_file) print('Ignition loaded.') # id used for persisting hash_id = create_hash_id(str(ignition['id'])) print('Hash id created.') # create hyperparameter combinations (for k-folding) hyperparameters = expand_grid(ignition['hyperparameters']) # load environment file psql_env = load_psql_env(pgpass_path=local_paths_env['pgpass_path']) print('PSQL environment file loaded.') # Initiate PSQL Connection connection = SQLConn(psql_env) connection.open() ##### 2. LOAD TRAIN AND TEST DATA ##### if check_persisted(local_paths_env['store_train_data'], f'{hash_id}_x', load_all_fresh): print("Found data") # data loaded before: load from file X_train = load(local_paths_env['store_train_data'], f'{hash_id}_x') X_test = load(local_paths_env['store_test_data'], f'{hash_id}_x') y_train = load(local_paths_env['store_train_data'], f'{hash_id}_y') y_test = load(local_paths_env['store_test_data'], f'{hash_id}_y') print('Loaded data from file.') else: print("Data not found in storage - load from database") # data not loaded: pull from database and create features X_train, X_test, y_train, y_test = sample( ignition, connection, local_paths_env['store_features']) print(f"X_train shape: {X_train.shape}") print(f"X_test shape: {X_test.shape}") print(f"y_train shape: {y_train.shape}") print(f"y_test shape: {y_test.shape}") # add fold index column to data X_train, y_train = k_fold(X_train, y_train, ignition['k_folds'], ignition['k_folds_seed']) # save data to file for future use save(X_train, local_paths_env['store_train_data'], f'{hash_id}_x', persist_all) save(X_test, local_paths_env['store_test_data'], f'{hash_id}_x', persist_all) save(y_train, local_paths_env['store_train_data'], f'{hash_id}_y', persist_all) save(y_test, local_paths_env['store_test_data'], f'{hash_id}_y', persist_all) print('Data loading completed.') ##### 3. K-FOLDING ##### # loop over folds for fold in tqdm(range(ignition['k_folds']), desc='Folds'): # get fold id hash (for persisting) fold_id = create_hash_id(str(ignition['id']) + str(fold)) # get fold data fold_X_train = X_train[X_train['k'] != fold] fold_X_test = X_train[X_train['k'] == fold] fold_y_train = y_train[y_train['k'] != fold] fold_y_test = y_train[y_train['k'] == fold] # store fold features, if any fold_features = {} ##### 4. LOOP OVER HYPERPARAMETERS: TRAIN CLASSIFIER ##### for hyperparam in tqdm(hyperparameters, desc='Hyperparameters'): # create hyperparam unique id and hyperparam-fold unique id hyperparam_id = create_hash_id( str(ignition['id']) + str(hyperparam)) hyperparam_fold_id = create_hash_id( str(ignition['id']) + str(hyperparam) + str(fold)) # if not check_val_in_db(connection, ignition['results_table_name'], # 'results', 'hash_id', hyperparam_fold_id, len(ignition['recalls'])): # create classifier of specified type and with specified target classifier = select_classifier(ignition["model_type"], fold_id, ignition["target"], ignition["classes"], fold_features, hyperparameters=hyperparam, seed=ignition['seed'], env=local_paths_env, load_fresh=load_all_fresh) #print('Classifier created.') # train classifier classifier.train(fold_X_train, fold_y_train) ##### 5. TEST CLASSIFIER ##### # generate predictions from classifier y_probs = classifier.predict(fold_X_test) ##### 6. EVALUATION ##### for recall in tqdm(ignition['recalls'], desc='Evaluations'): # compute evaluation metrics all_metrics = compute_metrics( metric_names=ignition['metrics'], y_true=fold_y_test.drop(columns=['k']), y_pred=y_probs, k=recall) # store results in database unique_id = create_hash_id( str(ignition['id']) + str(hyperparam) + str(fold) + str(recall)) results_to_db(metrics=all_metrics, table_name=ignition['results_table_name'], ignition_id=ignition['id'], hash_id=hyperparam_fold_id, algorithm=ignition['model_type'], hyperparameters=hyperparam, fold=str(fold), recall=recall, unique_id=unique_id, connection=connection) connection.close() print(f"Done running pipeline for ignition id: {ignition['id']}!")
def main(args): torch.manual_seed(1) model_name = '' model_name += (args.cell_type) + ('_') model_name += args.model_type args.bidirectional = not args.unidirectional print('the model name: ', model_name) args.data_root += '' args.save_root += '' args.dataset = args.cell_type args.data_root = os.path.join(args.data_root) print('loading data from: ', args.data_root) args.save_root = os.path.join(args.save_root, args.dataset) print('saving results in from: ', args.save_root) if args.model_root is None: model_dir = os.path.join(args.save_root, model_name) else: args.model_root = os.path.join(args.model_root, args.dataset) model_dir = os.path.join(args.model_root, model_name) if not os.path.exists(model_dir): os.makedirs(model_dir) attentionmapfile = model_dir + '/' + args.attentionfilename orig_attentionmapfile = model_dir + '/' + 'orig_' + args.attentionfilename print('==>processing data') Train, Valid, Test = data.load_data(args) print('==>building model') model = Model.att_chrome(args) if torch.cuda.device_count() > 0: torch.cuda.manual_seed_all(1) dtype = torch.cuda.FloatTensor # cuda.set_device(args.gpuid) model.type(dtype) print('Using GPU ' + str(args.gpuid)) else: dtype = torch.FloatTensor #print(model) if (args.test_on_saved_model == False): print("==>initializing a new model") for p in model.parameters(): p.data.uniform_(-0.1, 0.1) optimizer = optim.Adam(model.parameters(), lr=args.lr) #optimizer = optim.SGD(model.parameters(), lr = args.lr, momentum=args.momentum) def pgd_attack(model, inputs_1, batch_diff_targets, eps=1.0, alpha=1.0, iters=2): ori_inputs_1 = inputs_1.data if args.pgd_mask: if args.pgd_mask == 'fg': pgd_mask = (ori_inputs_1 > args.pgd_mask_threshold).to( torch.float32) elif args.pgd_mask == 'bg': pgd_mask = (ori_inputs_1 <= args.pgd_mask_threshold).to( torch.float32) #print(ori_images.min(), ori_images.max()) for i in range(iters): inputs_1.requires_grad = True if args.input_threshold: inputs = inputs_1 * (inputs_1 > args.input_threshold).to( torch.float32) else: inputs = inputs_1 if i == 0: batch_predictions_orig, batch_beta_orig, batch_alpha_orig = model( inputs.type(dtype)) batch_predictions, batch_beta, batch_alpha = model( inputs.type(dtype)) if i < iters - 1: model.zero_grad() loss = F.binary_cross_entropy_with_logits( batch_predictions, batch_diff_targets.cuda(), reduction='mean') loss.backward() grad_values = inputs_1.grad adv_inputs_1 = inputs_1 + alpha * grad_values.sign() eta = torch.clamp(adv_inputs_1 - ori_inputs_1, min=-eps, max=eps) if args.pgd_mask: eta = eta * pgd_mask inputs_1 = torch.clamp(ori_inputs_1 + eta, min=ori_inputs_1.min(), max=ori_inputs_1.max()).detach_() ori_inputs_1_flat = ori_inputs_1.detach().cpu().numpy().flatten() inputs_1_flat = inputs_1.detach().cpu().numpy().flatten() grad_values_flat = grad_values.detach().cpu().numpy().flatten() corr = pearsonr(inputs_1.detach().cpu().numpy().flatten(), ori_inputs_1.cpu().numpy().flatten())[0] return batch_predictions, corr, inputs_1_flat, grad_values_flat, ori_inputs_1_flat, batch_beta, batch_alpha, batch_predictions_orig, batch_beta_orig, batch_alpha_orig def train(TrainData): model.train() # initialize attention diff_targets = torch.zeros(TrainData.dataset.__len__(), 1) predictions = torch.zeros(diff_targets.size(0), 1) all_attention_bin = torch.zeros(TrainData.dataset.__len__(), (args.n_hms * args.n_bins)) all_attention_hm = torch.zeros(TrainData.dataset.__len__(), args.n_hms) num_batches = int( math.ceil(TrainData.dataset.__len__() / float(args.batch_size))) all_gene_ids = [None] * TrainData.dataset.__len__() per_epoch_loss = 0 print('Training') for idx, Sample in enumerate(TrainData): start, end = (idx * args.batch_size), min( (idx * args.batch_size) + args.batch_size, TrainData.dataset.__len__()) inputs_1 = Sample['input'] batch_diff_targets = Sample['label'].unsqueeze(1).float() optimizer.zero_grad() if args.pgd: batch_predictions, corr, adv_inputs, grad_values, ori_inputs, batch_beta, batch_alpha, batch_predictions_orig, batch_beta_orig, batch_alpha_orig = pgd_attack( model, inputs_1.type(dtype), batch_diff_targets, eps=1.0, alpha=1.0, iters=args.pgd_steps) else: batch_predictions, batch_beta, batch_alpha = model( inputs_1.type(dtype)) loss = F.binary_cross_entropy_with_logits(batch_predictions.cpu(), batch_diff_targets, reduction='mean') per_epoch_loss += loss.item() loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() diff_targets[start:end, 0] = batch_diff_targets[:, 0] all_gene_ids[start:end] = Sample['geneID'] batch_predictions = torch.sigmoid(batch_predictions) predictions[start:end] = batch_predictions.data.cpu() all_attention_bin[start:end] = batch_alpha.data all_attention_hm[start:end] = batch_beta.data per_epoch_loss = per_epoch_loss / num_batches return predictions, diff_targets, all_attention_bin, all_attention_hm, per_epoch_loss def test(ValidData): if args.pgd: model.train() else: model.eval() diff_targets = torch.zeros(ValidData.dataset.__len__(), 1) predictions = torch.zeros(diff_targets.size(0), 1) predictions_orig = torch.zeros(diff_targets.size(0), 1) all_attention_bin = torch.zeros(ValidData.dataset.__len__(), (args.n_hms * args.n_bins)) all_attention_hm = torch.zeros(ValidData.dataset.__len__(), args.n_hms) all_attention_bin_orig = torch.zeros(ValidData.dataset.__len__(), (args.n_hms * args.n_bins)) all_attention_hm_orig = torch.zeros(ValidData.dataset.__len__(), args.n_hms) num_batches = int( math.ceil(ValidData.dataset.__len__() / float(args.batch_size))) all_gene_ids = [None] * ValidData.dataset.__len__() per_epoch_loss = 0 per_epoch_corr = 0 all_adv_inputs = [] all_grad_values = [] all_ori_inputs = [] for idx, Sample in enumerate(ValidData): start, end = (idx * args.batch_size), min( (idx * args.batch_size) + args.batch_size, ValidData.dataset.__len__()) optimizer.zero_grad() inputs_1 = Sample['input'] batch_diff_targets = Sample['label'].unsqueeze(1).float() if args.pgd: batch_predictions, corr, adv_inputs, grad_values, ori_inputs, batch_beta, batch_alpha, batch_predictions_orig, batch_beta_orig, batch_alpha_orig = pgd_attack( model, inputs_1.type(dtype), batch_diff_targets, eps=1.0, alpha=1.0, iters=args.pgd_steps) all_adv_inputs = all_adv_inputs + list(adv_inputs) all_grad_values = all_grad_values + list(grad_values) all_ori_inputs = all_ori_inputs + list(ori_inputs) per_epoch_corr += corr else: if args.input_threshold: inputs_1 = inputs_1 * (inputs_1 > args.input_threshold).to( torch.float32) batch_predictions, batch_beta, batch_alpha = model( inputs_1.type(dtype)) batch_predictions_orig = batch_predictions #batch_predictions = model(inputs_1.type(dtype)) loss = F.binary_cross_entropy_with_logits(batch_predictions.cpu(), batch_diff_targets, reduction='mean') all_attention_bin[start:end] = batch_alpha.data all_attention_hm[start:end] = batch_beta.data all_attention_bin_orig[start:end] = batch_alpha_orig.data all_attention_hm_orig[start:end] = batch_beta_orig.data diff_targets[start:end, 0] = batch_diff_targets[:, 0] all_gene_ids[start:end] = Sample['geneID'] batch_predictions = torch.sigmoid(batch_predictions) batch_predictions_orig = torch.sigmoid(batch_predictions_orig) predictions[start:end] = batch_predictions.data.cpu() predictions_orig[start:end] = batch_predictions_orig.data.cpu() per_epoch_loss += loss.item() per_epoch_loss = per_epoch_loss / num_batches per_epoch_corr = per_epoch_corr / num_batches return predictions, diff_targets, all_attention_bin, all_attention_hm, per_epoch_loss, all_gene_ids, per_epoch_corr, np.array( all_adv_inputs), np.array(all_grad_values), np.array( all_ori_inputs ), all_attention_bin_orig, all_attention_hm_orig, predictions_orig best_valid_loss = 10000000000 best_valid_avgAUPR = -1 best_valid_avgAUC = -1 best_test_avgAUC = -1 if (args.test_on_saved_model == False): for epoch in range(0, args.epochs): print('---------------------------------------- Training ' + str(epoch + 1) + ' -----------------------------------') predictions, diff_targets, all_attention_bin, all_attention_hm, per_epoch_loss = train( Train) train_avgAUPR, train_avgAUC = evaluate.compute_metrics( predictions, diff_targets) predictions, diff_targets, alpha_valid, beta_valid, valid_loss, gene_ids_valid, test_corr, adv_inputs_valid, grad_values_valid, ori_inputs_valid, _, _, _ = test( Valid) valid_avgAUPR, valid_avgAUC = evaluate.compute_metrics( predictions, diff_targets) predictions, diff_targets, alpha_test, beta_test, test_loss, gene_ids_test, test_corr, adv_inputs_test, grad_values_test, ori_inputs_test, _, _, _ = test( Test) test_avgAUPR, test_avgAUC = evaluate.compute_metrics( predictions, diff_targets) if (valid_avgAUC >= best_valid_avgAUC): # save best epoch -- models converge early best_valid_avgAUC = valid_avgAUC best_test_avgAUC = test_avgAUC torch.save(model.cpu().state_dict(), model_dir + "/" + model_name + '_avgAUC_model.pt') model.type(dtype) print("Epoch:", epoch) print("train avgAUC:", train_avgAUC) print("valid avgAUC:", valid_avgAUC) print("test avgAUC:", test_avgAUC) print("best valid avgAUC:", best_valid_avgAUC) print("best test avgAUC:", best_test_avgAUC) print("\nFinished training") print("Best validation avgAUC:", best_valid_avgAUC) print("Best test avgAUC:", best_test_avgAUC) if (args.save_attention_maps): attentionfile = open(attentionmapfile, 'w') attentionfilewriter = csv.writer(attentionfile) beta_test = beta_test.numpy() for i in range(len(gene_ids_test)): gene_attention = [] gene_attention.append(gene_ids_test[i]) for e in beta_test[i, :]: gene_attention.append(str(e)) attentionfilewriter.writerow(gene_attention) attentionfile.close() return best_test_avgAUC, test_corr else: if args.kipoi_model: model.load_state_dict( kipoi.get_model("AttentiveChrome/{}".format( args.cell_type)).model.state_dict()) elif args.test_on_train != None: model.load_state_dict( torch.load(args.test_on_train + "/" + args.cell_type + '/' + model_name + '/' + model_name + '_avgAUC_model.pt')) else: model.load_state_dict( torch.load(model_dir + "/" + model_name + '_avgAUC_model.pt')) predictions, diff_targets, alpha_test, beta_test, test_loss, gene_ids_test, test_corr, adv_inputs_test, grad_values_test, ori_inputs_test, alpha_test_orig, beta_test_orig, predictions_orig = test( Test) test_avgAUPR, test_avgAUC = evaluate.compute_metrics( predictions, diff_targets) print("test avgAUC:", test_avgAUC) print("test corr:", test_corr) if (args.save_attention_maps): attentionfile = open(attentionmapfile, 'w') attentionfilewriter = csv.writer(attentionfile) beta_test = beta_test.numpy() for i in range(len(gene_ids_test)): gene_attention = [] gene_attention.append(gene_ids_test[i]) for e in beta_test[i, :]: gene_attention.append(str(e)) attentionfilewriter.writerow(gene_attention) attentionfile.close() if (args.save_adv_inputs): attentionfile = open(orig_attentionmapfile, 'w') attentionfilewriter = csv.writer(attentionfile) beta_test_orig = beta_test_orig.numpy() for i in range(len(gene_ids_test)): gene_attention = [] gene_attention.append(gene_ids_test[i]) for e in beta_test_orig[i, :]: gene_attention.append(str(e)) attentionfilewriter.writerow(gene_attention) attentionfile.close() if (args.save_adv_inputs): return test_avgAUC, test_corr, gene_ids_test, adv_inputs_test, grad_values_test, ori_inputs_test, alpha_test, alpha_test_orig, predictions, predictions_orig, diff_targets, beta_test, beta_test_orig return test_avgAUC, test_corr
def run_model(name, use_elmo=False, save_predictions=False, save_model=False): """ Trains the given deep learning model on train set, and evaluates on test set. Parameters ---------- name: str name of the deep learning model to be run: lstm | bilstm | stacked_bilstm | cnn | bert use_elmo: bool use ELMo embeddings if True | GloVe embeddings if False save_predictions: bool If True, stores and returns the predicted probabilities mapped to sentence ID save_model: bool If True, saves the trained model along with its vocabulary Returns ------- F1-score, Precision, Recall, Accuracy, Area Under Precision-Recall Curve on the test set; dictionary mapping predictions to ID, and number of training epochs for each fold. """ # token_indexer maps tokens to integers; using special built-in indexers for ELMo and BERT to ensure mapping is consistent with the original models if use_elmo: token_indexer = ELMoTokenCharactersIndexer() elif name == 'bert': global bert_token_indexer bert_token_indexer = PretrainedBertIndexer(pretrained_model=BERT_MODEL, do_lowercase=True) else: token_indexer = SingleIdTokenIndexer() if name == 'bert': # BERT uses a special wordpiece tokenizer reader = data_reader.GeneralizationDatasetReader( tokenizer=tokenizer_bert, token_indexers={"tokens": bert_token_indexer}, label_cols=LABEL_COLS) else: reader = data_reader.GeneralizationDatasetReader( tokenizer=tokenizer, token_indexers={"tokens": token_indexer}, label_cols=LABEL_COLS) map_id_pred_probability = {} # used if save_predictions is True f1s, precision_s, recall_s, accuracies, AUPRCs, n_epochs = [], [], [], [], [], [] for fold_number in range(1, 4): # 3-fold cross validation train_fname = 'train_data_fold_' + str(fold_number) + '.csv' val_fname = 'val_data_fold_' + str(fold_number) + '.csv' test_fname = 'test_data_fold_' + str(fold_number) + '.csv' train_dataset = reader.read(file_path=DATA_ROOT / train_fname) validation_dataset = reader.read(file_path=DATA_ROOT / val_fname) test_dataset = reader.read(file_path=DATA_ROOT / test_fname) # print("\n##################################\n", name, len(train_dataset), len(validation_dataset), len(test_dataset)) # Train the model: if name == 'lstm': model, vocab, ep = train.train_lstm(train_dataset, validation_dataset, BATCH_SIZE, num_layers=1, bidirectional=False, use_elmo=use_elmo) elif name == 'bilstm': model, vocab, ep = train.train_lstm(train_dataset, validation_dataset, BATCH_SIZE, num_layers=1, bidirectional=True, use_elmo=use_elmo) elif name == 'stacked_bilstm': model, vocab, ep = train.train_lstm(train_dataset, validation_dataset, BATCH_SIZE, num_layers=2, bidirectional=True, use_elmo=use_elmo) elif name == 'cnn': model, vocab, ep = train.train_cnn(train_dataset, validation_dataset, BATCH_SIZE, num_filters=100, filter_sizes=(2, 3, 4, 5), use_elmo=use_elmo) elif name == 'bert': model, vocab, ep = train.train_bert(train_dataset, validation_dataset, BATCH_SIZE, pretrained_model=BERT_MODEL) else: sys.exit("'name' not valid") n_epochs.append( ep) # keep track of number of actual training epochs for each fold # Predict and evaluate the model on test set: preds = evaluate.make_predictions( model, vocab, test_dataset, BATCH_SIZE ) # Note that 'preds' is of the shape (number of samples, 2) - the columns represent the probabilities for the two classes ['generalization', 'neutral'] f1, precision, recall, acc, auprc = evaluate.compute_metrics( preds, test_dataset) if save_predictions: id_pred = evaluate.map_id_prediction(preds, test_dataset) if set(id_pred.keys()).intersection( set(map_id_pred_probability.keys())) != set( ): # sanity check sys.exit( "Error: There is overlap in test set IDs across folds.") map_id_pred_probability.update(id_pred) if save_model: # save the model weights and vocabulary with open( './tmp/' + name + '_model' + '_fold_' + str(fold_number) + '.th', 'wb') as f: torch.save(model.state_dict(), f) vocab.save_to_files("./tmp/" + name + "_vocabulary_fold_" + str(fold_number)) print("\nFold #{} | F1 = {}".format(fold_number, f1)) f1s.append(f1) precision_s.append(precision) recall_s.append(recall) accuracies.append(acc) AUPRCs.append(auprc) mean_f1 = np.array(f1s).mean() mean_precision = np.array(precision_s).mean() mean_recall = np.array(recall_s).mean() mean_accuracy = np.array(accuracies).mean() mean_auprc = np.array(AUPRCs).mean() print("Total # predictions: {} | Saving Predictions = {}".format( len(map_id_pred_probability), save_predictions)) return mean_f1, mean_precision, mean_recall, mean_accuracy, mean_auprc, map_id_pred_probability, n_epochs