def eval(prediction_dict, sub_array=None, path="", add_name="", filter_down=False): prediction = prediction_dict['prediction'] true_label = prediction_dict['true_label'] if sub_array is not None: print('len label {}'.format(len(sub_array))) prediction = prediction[:, sub_array] ## obs x label true_label = true_label[:, sub_array] # # threshold_fmax=np.arange(0.0001,1,.005) if filter_down == True: ##!! when eval rare terms, what if only a few proteins have them?? print('dim before remove {}'.format(prediction.shape)) where = np.where(np.sum(true_label, axis=1) > 0)[0] print('retain these prot {}'.format(len(where))) prediction = prediction[where] print('check dim {}'.format(prediction.shape)) true_label = true_label[where] # result = evaluation_metric.all_metrics( np.round(prediction), true_label, yhat_raw=prediction, k=[5, 15, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100], path=path, add_name=add_name) return result
def eval(prediction_dict, sub_array=None): prediction = prediction_dict['prediction'] true_label = prediction_dict['true_label'] if sub_array is not None: prediction = prediction[:, sub_array] ## obs x label true_label = true_label[:, sub_array] # result = evaluation_metric.all_metrics(np.round(prediction), true_label, yhat_raw=prediction, k=[5, 10, 15, 20, 25, 30, 35, 40]) return result
def do_eval(self, prot_loader, **kwargs): torch.cuda.empty_cache() self.eval() tr_loss = 0 preds = [] true_label = [] for step, batch in enumerate(prot_loader): with torch.no_grad( ): ## no gradient for everything in this section batch = tuple(t for t in batch) if self.args.has_ppi_emb: prot_idx, prot_len, mask, label_ids, prot_interact_emb = batch ## @label_ids must be of size @args.num_label_to_test else: prot_idx, prot_len, mask, label_ids, _ = batch prot_idx = prot_idx[:, 0:int(max(prot_len))] ## trim down mask = mask[:, 0:int(max(prot_len))] if self.args.has_ppi_emb and (self.args.prot_interact_vec_dim > 0): prot_interact_emb = prot_interact_emb.cuda() else: prot_interact_emb = None pred, loss = self.forward(prot_idx.cuda(), mask.cuda(), prot_interact_emb, label_ids.cuda(), **kwargs) # loss = self.classify_loss ( pred, label_ids.cuda() ) tr_loss = tr_loss + loss ## take sgimoid here, if sigmoid was not taken inside @forward if self.loss_type == 'BCEWithLogitsLoss': pred = F.sigmoid(pred) if len(preds) == 0: preds.append(pred.detach().cpu().numpy()) true_label.append(label_ids.detach().cpu().numpy()) else: preds[0] = np.append(preds[0], pred.detach().cpu().numpy(), axis=0) true_label[0] = np.append(true_label[0], label_ids.detach().cpu().numpy(), axis=0) # row array # end eval true_label = true_label[0] preds = preds[0] print('loss {}'.format(tr_loss)) print('pred label') print(preds) print('true label') print(true_label) trackF1macro = {} trackF1micro = {} # metrics["f1_micro"] trackMacroPrecision = {} # [MACRO] accuracy, precision, recall trackMacroRecall = {} trackMicroPrecision = {} trackMicroRecall = {} ##!! DO NOT NEED TO DO THIS ALL THE TIME DURING TRAINING # if self.args.not_train: # rounding = np.arange(.1,1,.4) # else: rounding = [0.5] for round_cutoff in rounding: print('\n\nround cutoff {}'.format(round_cutoff)) preds_round = 1.0 * (round_cutoff < preds) ## converted into 0/1 result = evaluation_metric.all_metrics( preds_round, true_label, yhat_raw=preds, k=[10, 20]) ## we can pass vector of P@k and R@k evaluation_metric.print_metrics(result) if 'full_data' not in trackF1macro: trackF1macro['full_data'] = [result["f1_macro"]] trackF1micro['full_data'] = [result["f1_micro"]] trackMacroPrecision['full_data'] = [result["prec_macro"]] trackMicroPrecision['full_data'] = [result["prec_micro"]] trackMacroRecall['full_data'] = [result["rec_macro"]] trackMicroRecall['full_data'] = [result["rec_micro"]] else: trackF1macro['full_data'].append(result["f1_macro"]) trackF1micro['full_data'].append(result["f1_micro"]) trackMacroPrecision['full_data'].append(result["prec_macro"]) trackMicroPrecision['full_data'].append(result["prec_micro"]) trackMacroRecall['full_data'].append(result["rec_macro"]) trackMicroRecall['full_data'].append(result["rec_micro"]) if ('GoCount' in kwargs) and (self.args.not_train ): ## do not eed to do this all the time print( '\n\nsee if method improves accuracy conditioned on frequency of GO terms' ) ## frequency less than 25 quantile and over 75 quantile ## indexing must be computed ahead of time to to avoid redundant calculation for cutoff in ['quant25', 'quant75', 'betweenQ25Q75']: ## indexing of the column to pull out , @pred is num_prot x num_go result = evaluation_metric.all_metrics( preds_round[:, kwargs[cutoff]], true_label[:, kwargs[cutoff]], yhat_raw=preds[:, kwargs[cutoff]], k=[10, 20]) print("\nless than {} count".format(cutoff)) evaluation_metric.print_metrics(result) if cutoff not in trackF1macro: trackF1macro[cutoff] = [result["f1_macro"]] trackF1micro[cutoff] = [result["f1_micro"]] trackMacroPrecision[cutoff] = [result["prec_macro"]] trackMicroPrecision[cutoff] = [result["prec_micro"]] trackMacroRecall[cutoff] = [result["rec_macro"]] trackMicroRecall[cutoff] = [result["rec_micro"]] else: trackF1macro[cutoff].append(result["f1_macro"]) trackF1micro[cutoff].append(result["f1_micro"]) trackMacroPrecision[cutoff].append( result["prec_macro"]) trackMicroPrecision[cutoff].append( result["prec_micro"]) trackMacroRecall[cutoff].append(result["rec_macro"]) trackMicroRecall[cutoff].append(result["rec_micro"]) ## if self.args.not_train: print('\n\ntracking f1 compile into list\n') # print ('\nmacro f1 prec rec') for k, v in trackF1macro.items(): print('macroF1 ' + k + " " + " ".join(str(s) for s in v)) for k, v in trackMacroPrecision.items(): print('macroPrec ' + k + " " + " ".join(str(s) for s in v)) for k, v in trackMacroRecall.items(): print('macroRec ' + k + " " + " ".join(str(s) for s in v)) # print ('\nmicro f1 prec rec') for k, v in trackF1micro.items(): print('microF1 ' + k + " " + " ".join(str(s) for s in v)) for k, v in trackMicroPrecision.items(): print('microPrec ' + k + " " + " ".join(str(s) for s in v)) for k, v in trackMicroRecall.items(): print('microRec ' + k + " " + " ".join(str(s) for s in v)) output = { 'prediction': preds, 'truth': true_label } ##!! make life easier if we have both return result, output, tr_loss
def evaluate(args, model, tokenizer, label_2test_array, prefix=""): num_labels = len(label_2test_array) # Loop to handle MNLI double evaluation (matched, mis-matched) eval_output_dir = args.output_dir eval_dataset = load_and_cache_examples(args, tokenizer, label_2test_array, evaluate=True) if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]: os.makedirs(eval_output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly eval_sampler = SequentialSampler( eval_dataset) if args.local_rank == -1 else DistributedSampler( eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 model.eval() prediction = None true_label = None for batch in tqdm(eval_dataloader, desc="Evaluating"): # batch = batch.to(args.device) max_len_in_batch = int(torch.max(torch.sum( batch[3], 1))) ## only need max len of AA input_ids_aa = batch[1][:, 0:max_len_in_batch].to(args.device) input_ids_label = batch[2].to(args.device) attention_mask = torch.cat( (batch[3][:, 0:max_len_in_batch], torch.ones(input_ids_label.shape, dtype=torch.long)), dim=1).to(args.device) labels = batch[0].to(args.device) ## already in batch_size x num_label ## must append 0 positions to the front, so that we mask out AA labels_mask = torch.cat((torch.zeros( input_ids_aa.shape), torch.ones(input_ids_label.shape)), dim=1).to(args.device) ## test all labels ppi_vec = batch[4].unsqueeze(1).expand(labels.shape[0], max_len_in_batch + num_labels, 256).to(args.device) ## make if args.aa_type_emb: aa_type = batch[5][:, 0:max_len_in_batch].to(args.device) else: aa_type = None with torch.no_grad(): outputs = model(0, input_ids_aa=input_ids_aa, input_ids_label=input_ids_label, token_type_ids=aa_type, attention_mask=attention_mask, labels=labels, position_ids=None, attention_mask_label=labels_mask, prot_vec=ppi_vec) lm_loss = outputs[0] eval_loss += lm_loss.mean().item() nb_eval_steps += 1 ## track output norm_prob = torch.softmax(outputs[1], 1) ## still label x 2 norm_prob = norm_prob.detach().cpu().numpy()[:, 1] ## size is label if prediction is None: ## track predicted probability true_label = batch[0].data.numpy() prediction = np.reshape( norm_prob, (batch[0].shape)) ## num actual sample v.s. num label else: true_label = np.vstack((true_label, batch[0].data.numpy())) prediction = np.vstack( (prediction, np.reshape(norm_prob, (batch[0].shape)))) result = evaluation_metric.all_metrics( np.round(prediction), true_label, yhat_raw=prediction, k=[5, 10, 15, 20, 25]) ## we can pass vector of P@k and R@k # evaluation_metric.print_metrics( result ) result['eval_loss'] = eval_loss / nb_eval_steps output_eval_file = os.path.join(eval_output_dir, "eval_results.txt") with open(output_eval_file, "a+") as writer: logger.info("***** Eval results {} *****".format(prefix)) print("\n***** Eval results {} *****".format(prefix)) writer.write("\n***** Eval results {} *****".format(prefix)) for key in sorted(result.keys()): print(" {} = {}".format(key, str(result[key]))) # writer.write("%s = %s\n" % (key, str(result[key]))) return result
def evaluate(args, model, tokenizer, label_2test_array, prefix="", config=None): num_labels = len(label_2test_array) # Loop to handle MNLI double evaluation (matched, mis-matched) eval_output_dir = args.output_dir eval_dataset = load_and_cache_examples(args, tokenizer, label_2test_array, evaluate=True, config=config) if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]: os.makedirs(eval_output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly # eval_sampler = SequentialSampler(eval_dataset) if args.local_rank == -1 else DistributedSampler(eval_dataset) eval_sampler = RandomSampler( eval_dataset) if args.local_rank == -1 else DistributedSampler( eval_dataset) ## do this to avoid block of large data eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size, num_workers=2) # Eval! logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 model.eval() prediction = None true_label = None ave_GOvec = None for batch in tqdm(eval_dataloader, desc="Evaluating"): # batch = batch.to(args.device) if config.ppi_front: max_len_in_batch = int(torch.max( torch.sum(batch[3][:, 1::], 1))) ## exclude 1st column, only need max len of AA max_len_in_mask = max_len_in_batch + 1 else: max_len_in_batch = int(torch.max(torch.sum( batch[3], 1))) ## only need max len of AA max_len_in_mask = max_len_in_batch input_ids_aa = batch[1][:, 0:max_len_in_batch].to(args.device) input_ids_label = batch[2].to(args.device) ## also pass in SEP attention_mask = torch.cat( (batch[3][:, 0:max_len_in_mask], torch.ones(input_ids_label.shape, dtype=torch.long)), dim=1).to(args.device) labels = batch[0].to(args.device) ## already in batch_size x num_label ## must append 0 positions to the front, so that we mask out AA labels_mask = torch.cat( (torch.zeros(input_ids_aa.shape[0], max_len_in_mask), torch.ones(input_ids_label.shape)), dim=1).to(args.device) if args.model_type == 'ppi': if config.ppi_front: ppi_vec = batch[4].unsqueeze(1).to(args.device) else: ppi_vec = batch[4].unsqueeze(1).expand( labels.shape[0], max_len_in_batch + num_labels, 256).to(args.device) ## make 3D batchsize x 1 x dim else: ppi_vec = None if config.aa_type_emb: aa_type = batch[5][:, 0:max_len_in_batch, :].to(args.device) else: aa_type = None with torch.no_grad(): outputs = model(ppi_vec, input_ids_aa=input_ids_aa, input_ids_label=input_ids_label, token_type_ids=aa_type, attention_mask=attention_mask, labels=labels, position_ids=None, attention_mask_label=labels_mask, prot_vec=ppi_vec) lm_loss = outputs[0] eval_loss += lm_loss.mean().item() ## !! take average of last hidden layer hidden_GOvec = outputs[-2][12] ## batch x max_len x dim hidden_GOvec = hidden_GOvec[:, max_len_in_batch::, :] ## we remove AA, which goes from 0 to @max_len_in_batch. hidden_GOvec = torch.sum(hidden_GOvec, 0) ## sum over batch if ave_GOvec is None: ave_GOvec = hidden_GOvec else: ave_GOvec = ave_GOvec + hidden_GOvec nb_eval_steps += 1 ## track output norm_prob = torch.softmax(outputs[1], 1) ## still label x 2 norm_prob = norm_prob.detach().cpu().numpy()[:, 1] ## size is label if prediction is None: ## track predicted probability true_label = batch[0].data.numpy() prediction = np.reshape( norm_prob, (batch[0].shape)) ## num actual sample v.s. num label else: true_label = np.vstack((true_label, batch[0].data.numpy())) prediction = np.vstack( (prediction, np.reshape(norm_prob, (batch[0].shape)))) result = evaluation_metric.all_metrics( np.round(prediction), true_label, yhat_raw=prediction, k=[5, 10, 15, 20, 25]) ## we can pass vector of P@k and R@k # evaluation_metric.print_metrics( result ) result['eval_loss'] = eval_loss / nb_eval_steps output_eval_file = os.path.join(eval_output_dir, "eval_results" + prefix + ".txt") with open(output_eval_file, "a+") as writer: logger.info("***** Eval results {} *****".format(prefix)) print("\n***** Eval results {} *****".format(prefix)) writer.write("\n***** Eval results {} *****".format(prefix)) for key in sorted(result.keys()): print(" {} = {}".format(key, str(result[key]))) # writer.write("%s = %s\n" % (key, str(result[key]))) ## need to average out. ave_GOvec = ave_GOvec.detach().cpu().numpy() / len(eval_dataset) print('hidden go vec dim {}'.format(ave_GOvec.shape)) ## write out fout = open(args.output_dir + "/" + args.govec_outname + ".tsv", "w") for index, name in enumerate(label_2test_array): fout.write(name + '\t' + '\t'.join(str(s) for s in ave_GOvec[index]) + '\n') fout.close() return 0
def submitJobs (where_train,set_type,where_test, add_name, do_split ): ## @do_split is needed if we use metaGO data if add_name == 'none': add_name = "" os.chdir ( '/u/flashscratch/d/datduong/goAndGeneAnnotationDec2018/') test_gene_annot = pickle.load(open(where_test+"/"+set_type+"_gene_annot.pickle","rb")) print ('num of gene to be tested {}'.format(len(test_gene_annot))) print ('\n\nmust use the prot names in the annot, not psiblast outcome\n\n') genes = list (test_gene_annot.keys()) genes.sort() ## alphabet prediction = pickle.load ( open(where_train+"/seq_seq_predict_go_"+add_name+".pickle","rb") ) ## for each gene, fill in the prediction matrix label_index_map = pickle.load ( open (where_train+"/label_index_map.pickle","rb") ) prediction_np = np.zeros( (len(genes), len(label_index_map)) ) for g in genes : if g not in prediction: continue go_assign = list ( prediction[g].keys() ) go_assign.sort() score = [prediction[g][go] for go in go_assign] location = [label_index_map[go] for go in go_assign] ## assign the score prediction_np [ genes.index(g), location ] = score ## convert np into pd to get row names df = pd.DataFrame(prediction_np, index=genes) pickle.dump ( df, open(where_train+"/seq_seq_predict_go_"+add_name+".pd.pickle","wb")) ## filter out to only go terms in training set truth_np = np.zeros( (len(genes), len(label_index_map)) ) for g in genes : if do_split == 1: if ";" in test_gene_annot[g][0]: go_assign = test_gene_annot[g][0].strip().split(";") else: go_assign = test_gene_annot[g][0].strip().split(",") else: go_assign = test_gene_annot[g] # go_assign.sort() go_assign = [re.sub("GO:","",go) for go in go_assign] location = [label_index_map[go] for go in go_assign if go in label_index_map ] ## !! record only GO we saw in training ## assign the score truth_np [ genes.index(g), location ] = 1 print ('animo GO prediction') print (prediction_np) track_prec = [] track_rec = [] for k in [5,10,15,20,25,30,35,40]: animo_go_metric = evaluation_metric.all_metrics ( np.round(prediction_np), truth_np, yhat_raw=prediction_np, k=k ) ## [ 0:(16*3) , :] if k == 5 : evaluation_metric.print_metrics( animo_go_metric ) track_prec.append(animo_go_metric['prec_at_'+str(k)]) track_rec.append(animo_go_metric['rec_at_'+str(k)]) # fmax_val = fmax.f_max ( truth_np, prediction_np, threshold=np.arange(0,1,.02) ) print ('fmax value {}'.format ( fmax_val ) ) print ('precision/recall at K') print (track_prec) print (track_rec) label_bio_type = pickle.load( open( where_train+'/label_bio_type.pickle','rb') ) # common30 = pickle.load ( open(where_train+"/common_index30.pickle","rb")) # label_bio_type['common30'] = common30 for bio_type in label_bio_type: index = label_bio_type [ bio_type ] print ( "\n\n"+bio_type) print ( index[0:10] ) track_prec = [] track_rec = [] for k in [5,10,15,20,25,30,35,40]: animo_go_metric = evaluation_metric.all_metrics ( np.round(prediction_np[: , index]), truth_np[: , index], yhat_raw=prediction_np[: , index], k=k) if k == 5 : evaluation_metric.print_metrics( animo_go_metric ) track_prec.append(animo_go_metric['prec_at_'+str(k)]) track_rec.append(animo_go_metric['rec_at_'+str(k)]) fmax_val = fmax.f_max ( truth_np[: , index], prediction_np[: , index], threshold=np.arange(0,1,.02) ) print ('fmax value {}'.format ( fmax_val ) ) print ('precision/recall at K') print (track_prec) print (track_rec)
def submitJobs (main_dir, data_dir, blast_result_dir, what_set, ontology_type, all_test_label,add_name='none') : if add_name=='none': add_name = "" #### blast and psi-blast will have the same format. ## @all_test_label is file of all labels to be tested, adding this so that we return a matrix num_ob x num_label os.chdir(main_dir) ## labels to be tested all_test_label = pd.read_csv(all_test_label,header=None) print ('\nsort labels to be tested, we do the same when using NN model.') all_test_label = sorted ( list(all_test_label[0]) ) label_lookup = {value:index for index,value in enumerate(all_test_label)} ## prot annotation train set, will be used later to infer assignment in testset ## can only predict what is found in train set if we use blast print ('load go annotation for train data') ## we can convert text into dict on-the-fly # try: # prot_annot = pickle.load ( open (data_dir+'train-'+ontology_type+'.TrueLabel.pickle','rb') ) # except: # train-mf.tsv prot_annot, prot_name_train = MakeGroundTruthText2Dict(data_dir+'train-'+ontology_type+add_name+'.tsv') print ('\nnum of prots in train data {}\n'.format(len(prot_annot))) print ('load go annotation for test data') ## COMMENT get true labels ## COMMENT 'test-'+ontology_type+'.tsv' has different ordering than 'test-'+ontology_type+'-input.tsv' print ('test file name {}'.format(data_dir+'test-'+ontology_type+add_name+'.tsv')) ##!!##!! ground_truth, prot_name_test = load_true_data (data_dir+'test-'+ontology_type+add_name+'.tsv',label_lookup) ##!!##!! print ('\nnum of prots in test data {}\n'.format(len(prot_name_test))) print ('\nread psiblast result') df_psiblast = pd.read_csv ( blast_result_dir+what_set+"-"+ontology_type+".psiblast.txt" , header=None, skip_blank_lines=True ) df_psiblast = df_psiblast.dropna() df_psiblast = df_psiblast.reset_index(drop=True) prot_name_in_psi = sorted ( list ( set (list ( df_psiblast[0] ) ) ) ) print ('\nnum of prots from test found in psiblast {}, we may be unable to find match for all test sequence\n'.format(len(prot_name_in_psi))) print ('\nread blast result') df_blast = pd.read_csv ( blast_result_dir+what_set+"-"+ontology_type+".blast.txt" , header=None,skip_blank_lines=True ) ## should make prediction as a matrix # prediction = {} prediction = np.zeros([len(prot_name_test),len(label_lookup)]) in_psi = set(df_psiblast[0]) in_blast = set(df_blast[0]) for index,this_prot in tqdm(enumerate(prot_name_test)) : if (this_prot not in in_psi) and (this_prot not in in_blast): print ('not found in both blast and psiblast {}'.format(this_prot)) continue df_psiblast_g = df_psiblast[ df_psiblast[0] == this_prot ] df_psiblast_g = df_psiblast_g[ df_psiblast_g[1] != this_prot ] ## don't compare to self df_blast_g = df_blast[ df_blast[0] == this_prot ] df_blast_g = df_blast_g[ df_blast_g[1] != this_prot ] ## don't compare to self psiblast_go_score_array, w_psiblast = tally_over_n_template ( df_psiblast_g, prot_annot ) blast_go_score_array, _ = tally_over_n_template ( df_blast_g, prot_annot ) final_score = {} psiblast_go = list ( psiblast_go_score_array.keys() ) blast_go = list ( blast_go_score_array.keys() ) go_found = set ( psiblast_go + blast_go ) if len(go_found) == 0: ## funky stuffs ?? print ('pass 1st screen in blast+psiblast but not found any go term ?? {}'.format(this_prot)) final_score[this_prot] = None continue for g in go_found: ## average between psiblast and blast if (g in psiblast_go_score_array) and (g in blast_go_score_array) : x1 = psiblast_go_score_array[g] * (1-w_psiblast) + blast_go_score_array[g] * (w_psiblast) if (g in psiblast_go_score_array) and (g not in blast_go_score_array) : x1 = psiblast_go_score_array[g] if (g not in psiblast_go_score_array) and (g in blast_go_score_array) : x1 = blast_go_score_array[g] final_score[g] = x1 ## each GO term has a score for this one protein ## done with this one protein prediction [index] = order_go_score (final_score,label_lookup) ## filter down original set so things run faster # df[~df.countries.isin(countries)] # df_psiblast = df_psiblast[ ~df_psiblast[0].isin([this_prot]) ] # df_blast = df_blast[ ~df_blast[0].isin([this_prot]) ] # if index > 10: # print (prediction[0:10]) # exit() ## finish all proteins pickle.dump ( {'prediction':prediction, 'true_label':ground_truth}, open(blast_result_dir+what_set+"-"+ontology_type+"-prediction.pickle","wb") ) result = evaluation_metric.all_metrics ( np.round(prediction) , ground_truth, yhat_raw=prediction, k=[5,10,15,20,25]) ## we can pass vector of P@k and R@k evaluation_metric.print_metrics( result )