Exemple #1
0
def eval(prediction_dict,
         sub_array=None,
         path="",
         add_name="",
         filter_down=False):
    prediction = prediction_dict['prediction']
    true_label = prediction_dict['true_label']
    if sub_array is not None:
        print('len label {}'.format(len(sub_array)))
        prediction = prediction[:, sub_array]  ## obs x label
        true_label = true_label[:, sub_array]
    #
    # threshold_fmax=np.arange(0.0001,1,.005)
    if filter_down == True:  ##!! when eval rare terms, what if only a few proteins have them??
        print('dim before remove {}'.format(prediction.shape))
        where = np.where(np.sum(true_label, axis=1) > 0)[0]
        print('retain these prot {}'.format(len(where)))
        prediction = prediction[where]
        print('check dim {}'.format(prediction.shape))
        true_label = true_label[where]
    #
    result = evaluation_metric.all_metrics(
        np.round(prediction),
        true_label,
        yhat_raw=prediction,
        k=[5, 15, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
        path=path,
        add_name=add_name)
    return result
def eval(prediction_dict, sub_array=None):
    prediction = prediction_dict['prediction']
    true_label = prediction_dict['true_label']
    if sub_array is not None:
        prediction = prediction[:, sub_array]  ## obs x label
        true_label = true_label[:, sub_array]
    #
    result = evaluation_metric.all_metrics(np.round(prediction),
                                           true_label,
                                           yhat_raw=prediction,
                                           k=[5, 10, 15, 20, 25, 30, 35, 40])
    return result
    def do_eval(self, prot_loader, **kwargs):

        torch.cuda.empty_cache()
        self.eval()

        tr_loss = 0
        preds = []
        true_label = []

        for step, batch in enumerate(prot_loader):

            with torch.no_grad(
            ):  ## no gradient for everything in this section

                batch = tuple(t for t in batch)
                if self.args.has_ppi_emb:
                    prot_idx, prot_len, mask, label_ids, prot_interact_emb = batch  ## @label_ids must be of size @args.num_label_to_test
                else:
                    prot_idx, prot_len, mask, label_ids, _ = batch

                prot_idx = prot_idx[:, 0:int(max(prot_len))]  ## trim down
                mask = mask[:, 0:int(max(prot_len))]

                if self.args.has_ppi_emb and (self.args.prot_interact_vec_dim >
                                              0):
                    prot_interact_emb = prot_interact_emb.cuda()
                else:
                    prot_interact_emb = None

                pred, loss = self.forward(prot_idx.cuda(),
                                          mask.cuda(), prot_interact_emb,
                                          label_ids.cuda(), **kwargs)

                # loss = self.classify_loss ( pred, label_ids.cuda() )

            tr_loss = tr_loss + loss

            ## take sgimoid here, if sigmoid was not taken inside @forward
            if self.loss_type == 'BCEWithLogitsLoss':
                pred = F.sigmoid(pred)

            if len(preds) == 0:
                preds.append(pred.detach().cpu().numpy())
                true_label.append(label_ids.detach().cpu().numpy())
            else:
                preds[0] = np.append(preds[0],
                                     pred.detach().cpu().numpy(),
                                     axis=0)
                true_label[0] = np.append(true_label[0],
                                          label_ids.detach().cpu().numpy(),
                                          axis=0)  # row array

        # end eval
        true_label = true_label[0]
        preds = preds[0]

        print('loss {}'.format(tr_loss))

        print('pred label')
        print(preds)

        print('true label')
        print(true_label)

        trackF1macro = {}
        trackF1micro = {}  # metrics["f1_micro"]

        trackMacroPrecision = {}  # [MACRO] accuracy, precision, recall
        trackMacroRecall = {}

        trackMicroPrecision = {}
        trackMicroRecall = {}

        ##!! DO NOT NEED TO DO THIS ALL THE TIME DURING TRAINING
        # if self.args.not_train:
        #   rounding = np.arange(.1,1,.4)
        # else:
        rounding = [0.5]

        for round_cutoff in rounding:

            print('\n\nround cutoff {}'.format(round_cutoff))

            preds_round = 1.0 * (round_cutoff < preds)  ## converted into 0/1

            result = evaluation_metric.all_metrics(
                preds_round, true_label, yhat_raw=preds,
                k=[10, 20])  ## we can pass vector of P@k and R@k
            evaluation_metric.print_metrics(result)

            if 'full_data' not in trackF1macro:
                trackF1macro['full_data'] = [result["f1_macro"]]
                trackF1micro['full_data'] = [result["f1_micro"]]
                trackMacroPrecision['full_data'] = [result["prec_macro"]]
                trackMicroPrecision['full_data'] = [result["prec_micro"]]
                trackMacroRecall['full_data'] = [result["rec_macro"]]
                trackMicroRecall['full_data'] = [result["rec_micro"]]
            else:
                trackF1macro['full_data'].append(result["f1_macro"])
                trackF1micro['full_data'].append(result["f1_micro"])
                trackMacroPrecision['full_data'].append(result["prec_macro"])
                trackMicroPrecision['full_data'].append(result["prec_micro"])
                trackMacroRecall['full_data'].append(result["rec_macro"])
                trackMicroRecall['full_data'].append(result["rec_micro"])

            if ('GoCount'
                    in kwargs) and (self.args.not_train
                                    ):  ## do not eed to do this all the time
                print(
                    '\n\nsee if method improves accuracy conditioned on frequency of GO terms'
                )

                ## frequency less than 25 quantile  and over 75 quantile
                ## indexing must be computed ahead of time to to avoid redundant calculation

                for cutoff in ['quant25', 'quant75', 'betweenQ25Q75']:
                    ## indexing of the column to pull out , @pred is num_prot x num_go
                    result = evaluation_metric.all_metrics(
                        preds_round[:, kwargs[cutoff]],
                        true_label[:, kwargs[cutoff]],
                        yhat_raw=preds[:, kwargs[cutoff]],
                        k=[10, 20])
                    print("\nless than {} count".format(cutoff))
                    evaluation_metric.print_metrics(result)

                    if cutoff not in trackF1macro:
                        trackF1macro[cutoff] = [result["f1_macro"]]
                        trackF1micro[cutoff] = [result["f1_micro"]]
                        trackMacroPrecision[cutoff] = [result["prec_macro"]]
                        trackMicroPrecision[cutoff] = [result["prec_micro"]]
                        trackMacroRecall[cutoff] = [result["rec_macro"]]
                        trackMicroRecall[cutoff] = [result["rec_micro"]]
                    else:
                        trackF1macro[cutoff].append(result["f1_macro"])
                        trackF1micro[cutoff].append(result["f1_micro"])
                        trackMacroPrecision[cutoff].append(
                            result["prec_macro"])
                        trackMicroPrecision[cutoff].append(
                            result["prec_micro"])
                        trackMacroRecall[cutoff].append(result["rec_macro"])
                        trackMicroRecall[cutoff].append(result["rec_micro"])

        ##
        if self.args.not_train:
            print('\n\ntracking f1 compile into list\n')

            # print ('\nmacro f1 prec rec')
            for k, v in trackF1macro.items():
                print('macroF1 ' + k + " " + " ".join(str(s) for s in v))

            for k, v in trackMacroPrecision.items():
                print('macroPrec ' + k + " " + " ".join(str(s) for s in v))

            for k, v in trackMacroRecall.items():
                print('macroRec ' + k + " " + " ".join(str(s) for s in v))

            # print ('\nmicro f1 prec rec')
            for k, v in trackF1micro.items():
                print('microF1 ' + k + " " + " ".join(str(s) for s in v))

            for k, v in trackMicroPrecision.items():
                print('microPrec ' + k + " " + " ".join(str(s) for s in v))

            for k, v in trackMicroRecall.items():
                print('microRec ' + k + " " + " ".join(str(s) for s in v))

        output = {
            'prediction': preds,
            'truth': true_label
        }  ##!! make life easier if we have both
        return result, output, tr_loss
Exemple #4
0
def evaluate(args, model, tokenizer, label_2test_array, prefix=""):

    num_labels = len(label_2test_array)

    # Loop to handle MNLI double evaluation (matched, mis-matched)
    eval_output_dir = args.output_dir

    eval_dataset = load_and_cache_examples(args,
                                           tokenizer,
                                           label_2test_array,
                                           evaluate=True)

    if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(eval_output_dir)

    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    # Note that DistributedSampler samples randomly
    eval_sampler = SequentialSampler(
        eval_dataset) if args.local_rank == -1 else DistributedSampler(
            eval_dataset)
    eval_dataloader = DataLoader(eval_dataset,
                                 sampler=eval_sampler,
                                 batch_size=args.eval_batch_size)

    # Eval!
    logger.info("***** Running evaluation {} *****".format(prefix))
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    model.eval()

    prediction = None
    true_label = None

    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        # batch = batch.to(args.device)

        max_len_in_batch = int(torch.max(torch.sum(
            batch[3], 1)))  ## only need max len of AA
        input_ids_aa = batch[1][:, 0:max_len_in_batch].to(args.device)
        input_ids_label = batch[2].to(args.device)
        attention_mask = torch.cat(
            (batch[3][:, 0:max_len_in_batch],
             torch.ones(input_ids_label.shape, dtype=torch.long)),
            dim=1).to(args.device)

        labels = batch[0].to(args.device)  ## already in batch_size x num_label
        ## must append 0 positions to the front, so that we mask out AA
        labels_mask = torch.cat((torch.zeros(
            input_ids_aa.shape), torch.ones(input_ids_label.shape)),
                                dim=1).to(args.device)  ## test all labels

        ppi_vec = batch[4].unsqueeze(1).expand(labels.shape[0],
                                               max_len_in_batch + num_labels,
                                               256).to(args.device)  ## make

        if args.aa_type_emb:
            aa_type = batch[5][:, 0:max_len_in_batch].to(args.device)
        else:
            aa_type = None

        with torch.no_grad():
            outputs = model(0,
                            input_ids_aa=input_ids_aa,
                            input_ids_label=input_ids_label,
                            token_type_ids=aa_type,
                            attention_mask=attention_mask,
                            labels=labels,
                            position_ids=None,
                            attention_mask_label=labels_mask,
                            prot_vec=ppi_vec)
            lm_loss = outputs[0]
            eval_loss += lm_loss.mean().item()

        nb_eval_steps += 1

        ## track output
        norm_prob = torch.softmax(outputs[1], 1)  ## still label x 2
        norm_prob = norm_prob.detach().cpu().numpy()[:, 1]  ## size is label

        if prediction is None:
            ## track predicted probability
            true_label = batch[0].data.numpy()
            prediction = np.reshape(
                norm_prob,
                (batch[0].shape))  ## num actual sample v.s. num label
        else:
            true_label = np.vstack((true_label, batch[0].data.numpy()))
            prediction = np.vstack(
                (prediction, np.reshape(norm_prob, (batch[0].shape))))

    result = evaluation_metric.all_metrics(
        np.round(prediction),
        true_label,
        yhat_raw=prediction,
        k=[5, 10, 15, 20, 25])  ## we can pass vector of P@k and R@k
    # evaluation_metric.print_metrics( result )

    result['eval_loss'] = eval_loss / nb_eval_steps

    output_eval_file = os.path.join(eval_output_dir, "eval_results.txt")
    with open(output_eval_file, "a+") as writer:
        logger.info("***** Eval results {} *****".format(prefix))
        print("\n***** Eval results {} *****".format(prefix))
        writer.write("\n***** Eval results {} *****".format(prefix))
        for key in sorted(result.keys()):
            print("  {} = {}".format(key, str(result[key])))
            # writer.write("%s = %s\n" % (key, str(result[key])))

    return result
def evaluate(args,
             model,
             tokenizer,
             label_2test_array,
             prefix="",
             config=None):

    num_labels = len(label_2test_array)

    # Loop to handle MNLI double evaluation (matched, mis-matched)
    eval_output_dir = args.output_dir

    eval_dataset = load_and_cache_examples(args,
                                           tokenizer,
                                           label_2test_array,
                                           evaluate=True,
                                           config=config)

    if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(eval_output_dir)

    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    # Note that DistributedSampler samples randomly
    # eval_sampler = SequentialSampler(eval_dataset) if args.local_rank == -1 else DistributedSampler(eval_dataset)
    eval_sampler = RandomSampler(
        eval_dataset) if args.local_rank == -1 else DistributedSampler(
            eval_dataset)  ## do this to avoid block of large data
    eval_dataloader = DataLoader(eval_dataset,
                                 sampler=eval_sampler,
                                 batch_size=args.eval_batch_size,
                                 num_workers=2)

    # Eval!
    logger.info("***** Running evaluation {} *****".format(prefix))
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    model.eval()

    prediction = None
    true_label = None
    ave_GOvec = None

    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        # batch = batch.to(args.device)

        if config.ppi_front:
            max_len_in_batch = int(torch.max(
                torch.sum(batch[3][:, 1::],
                          1)))  ## exclude 1st column, only need max len of AA
            max_len_in_mask = max_len_in_batch + 1
        else:
            max_len_in_batch = int(torch.max(torch.sum(
                batch[3], 1)))  ## only need max len of AA
            max_len_in_mask = max_len_in_batch

        input_ids_aa = batch[1][:, 0:max_len_in_batch].to(args.device)
        input_ids_label = batch[2].to(args.device)  ## also pass in SEP

        attention_mask = torch.cat(
            (batch[3][:, 0:max_len_in_mask],
             torch.ones(input_ids_label.shape, dtype=torch.long)),
            dim=1).to(args.device)

        labels = batch[0].to(args.device)  ## already in batch_size x num_label
        ## must append 0 positions to the front, so that we mask out AA
        labels_mask = torch.cat(
            (torch.zeros(input_ids_aa.shape[0],
                         max_len_in_mask), torch.ones(input_ids_label.shape)),
            dim=1).to(args.device)

        if args.model_type == 'ppi':
            if config.ppi_front:
                ppi_vec = batch[4].unsqueeze(1).to(args.device)
            else:
                ppi_vec = batch[4].unsqueeze(1).expand(
                    labels.shape[0], max_len_in_batch + num_labels,
                    256).to(args.device)  ## make 3D batchsize x 1 x dim
        else:
            ppi_vec = None

        if config.aa_type_emb:
            aa_type = batch[5][:, 0:max_len_in_batch, :].to(args.device)
        else:
            aa_type = None

        with torch.no_grad():
            outputs = model(ppi_vec,
                            input_ids_aa=input_ids_aa,
                            input_ids_label=input_ids_label,
                            token_type_ids=aa_type,
                            attention_mask=attention_mask,
                            labels=labels,
                            position_ids=None,
                            attention_mask_label=labels_mask,
                            prot_vec=ppi_vec)
            lm_loss = outputs[0]
            eval_loss += lm_loss.mean().item()

        ## !! take average of last hidden layer
        hidden_GOvec = outputs[-2][12]  ## batch x max_len x dim
        hidden_GOvec = hidden_GOvec[:,
                                    max_len_in_batch::, :]  ## we remove AA, which goes from 0 to @max_len_in_batch.
        hidden_GOvec = torch.sum(hidden_GOvec, 0)  ## sum over batch
        if ave_GOvec is None:
            ave_GOvec = hidden_GOvec
        else:
            ave_GOvec = ave_GOvec + hidden_GOvec

        nb_eval_steps += 1
        ## track output
        norm_prob = torch.softmax(outputs[1], 1)  ## still label x 2
        norm_prob = norm_prob.detach().cpu().numpy()[:, 1]  ## size is label

        if prediction is None:
            ## track predicted probability
            true_label = batch[0].data.numpy()
            prediction = np.reshape(
                norm_prob,
                (batch[0].shape))  ## num actual sample v.s. num label
        else:
            true_label = np.vstack((true_label, batch[0].data.numpy()))
            prediction = np.vstack(
                (prediction, np.reshape(norm_prob, (batch[0].shape))))

    result = evaluation_metric.all_metrics(
        np.round(prediction),
        true_label,
        yhat_raw=prediction,
        k=[5, 10, 15, 20, 25])  ## we can pass vector of P@k and R@k
    # evaluation_metric.print_metrics( result )
    result['eval_loss'] = eval_loss / nb_eval_steps

    output_eval_file = os.path.join(eval_output_dir,
                                    "eval_results" + prefix + ".txt")
    with open(output_eval_file, "a+") as writer:
        logger.info("***** Eval results {} *****".format(prefix))
        print("\n***** Eval results {} *****".format(prefix))
        writer.write("\n***** Eval results {} *****".format(prefix))
        for key in sorted(result.keys()):
            print("  {} = {}".format(key, str(result[key])))
            # writer.write("%s = %s\n" % (key, str(result[key])))

    ## need to average out.
    ave_GOvec = ave_GOvec.detach().cpu().numpy() / len(eval_dataset)
    print('hidden go vec dim {}'.format(ave_GOvec.shape))
    ## write out
    fout = open(args.output_dir + "/" + args.govec_outname + ".tsv", "w")
    for index, name in enumerate(label_2test_array):
        fout.write(name + '\t' + '\t'.join(str(s)
                                           for s in ave_GOvec[index]) + '\n')
    fout.close()

    return 0
def submitJobs (where_train,set_type,where_test, add_name, do_split ): ## @do_split is needed if we use metaGO data

  if add_name == 'none':
    add_name = ""

  os.chdir ( '/u/flashscratch/d/datduong/goAndGeneAnnotationDec2018/')

  test_gene_annot = pickle.load(open(where_test+"/"+set_type+"_gene_annot.pickle","rb"))
  print ('num of gene to be tested {}'.format(len(test_gene_annot)))

  print ('\n\nmust use the prot names in the annot, not psiblast outcome\n\n')
  genes = list (test_gene_annot.keys())
  genes.sort() ## alphabet

  prediction = pickle.load ( open(where_train+"/seq_seq_predict_go_"+add_name+".pickle","rb") )

  ## for each gene, fill in the prediction matrix
  label_index_map = pickle.load ( open (where_train+"/label_index_map.pickle","rb") )
  prediction_np = np.zeros( (len(genes), len(label_index_map)) )

  for g in genes :
    if g not in prediction:
      continue
    go_assign = list ( prediction[g].keys() )
    go_assign.sort()
    score = [prediction[g][go] for go in go_assign]
    location = [label_index_map[go] for go in go_assign]
    ## assign the score
    prediction_np [ genes.index(g), location ] = score

  ## convert np into pd to get row names
  df = pd.DataFrame(prediction_np, index=genes)
  pickle.dump ( df, open(where_train+"/seq_seq_predict_go_"+add_name+".pd.pickle","wb"))

  ## filter out to only go terms in training set
  truth_np = np.zeros( (len(genes), len(label_index_map)) )

  for g in genes :
    if do_split == 1:
      if ";" in test_gene_annot[g][0]:
        go_assign = test_gene_annot[g][0].strip().split(";")
      else:
        go_assign = test_gene_annot[g][0].strip().split(",")
    else:
      go_assign = test_gene_annot[g]

    #
    go_assign.sort()
    go_assign = [re.sub("GO:","",go) for go in go_assign]

    location = [label_index_map[go] for go in go_assign if go in label_index_map ] ## !! record only GO we saw in training
    ## assign the score
    truth_np [ genes.index(g), location ] = 1

  print ('animo GO prediction')
  print (prediction_np)
  track_prec = []
  track_rec = []
  for k in [5,10,15,20,25,30,35,40]:
    animo_go_metric = evaluation_metric.all_metrics ( np.round(prediction_np), truth_np, yhat_raw=prediction_np, k=k ) ##  [ 0:(16*3) , :]
    if k == 5 :
      evaluation_metric.print_metrics( animo_go_metric )
    track_prec.append(animo_go_metric['prec_at_'+str(k)])
    track_rec.append(animo_go_metric['rec_at_'+str(k)])

  #
  fmax_val = fmax.f_max ( truth_np, prediction_np, threshold=np.arange(0,1,.02) )
  print ('fmax value {}'.format ( fmax_val ) )
  print ('precision/recall at K')
  print (track_prec)
  print (track_rec)


  label_bio_type = pickle.load( open( where_train+'/label_bio_type.pickle','rb') )
  # common30 = pickle.load ( open(where_train+"/common_index30.pickle","rb"))
  # label_bio_type['common30'] = common30

  for bio_type in label_bio_type:
    index = label_bio_type [ bio_type ]
    print ( "\n\n"+bio_type)
    print ( index[0:10] )
    track_prec = []
    track_rec = []
    for k in [5,10,15,20,25,30,35,40]:
      animo_go_metric = evaluation_metric.all_metrics ( np.round(prediction_np[: , index]), truth_np[: , index], yhat_raw=prediction_np[: , index], k=k)
      if k == 5 :
        evaluation_metric.print_metrics( animo_go_metric )
      track_prec.append(animo_go_metric['prec_at_'+str(k)])
      track_rec.append(animo_go_metric['rec_at_'+str(k)])

    fmax_val = fmax.f_max ( truth_np[: , index], prediction_np[: , index], threshold=np.arange(0,1,.02) )
    print ('fmax value {}'.format ( fmax_val ) )
    print ('precision/recall at K')
    print (track_prec)
    print (track_rec)
def submitJobs (main_dir, data_dir, blast_result_dir, what_set, ontology_type, all_test_label,add_name='none') :

  if add_name=='none':
    add_name = ""

  #### blast and psi-blast will have the same format.
  ## @all_test_label is file of all labels to be tested, adding this so that we return a matrix num_ob x num_label
  os.chdir(main_dir)

  ## labels to be tested
  all_test_label = pd.read_csv(all_test_label,header=None)
  print ('\nsort labels to be tested, we do the same when using NN model.')
  all_test_label = sorted ( list(all_test_label[0]) )
  label_lookup = {value:index for index,value in enumerate(all_test_label)}

  ## prot annotation train set, will be used later to infer assignment in testset
  ## can only predict what is found in train set if we use blast
  print ('load go annotation for train data')
  ## we can convert text into dict on-the-fly
  # try:
  #   prot_annot = pickle.load ( open (data_dir+'train-'+ontology_type+'.TrueLabel.pickle','rb') )
  # except:
  # train-mf.tsv

  prot_annot, prot_name_train = MakeGroundTruthText2Dict(data_dir+'train-'+ontology_type+add_name+'.tsv')
  print ('\nnum of prots in train data {}\n'.format(len(prot_annot)))

  print ('load go annotation for test data')
  ## COMMENT get true labels
  ## COMMENT 'test-'+ontology_type+'.tsv' has different ordering than 'test-'+ontology_type+'-input.tsv'
  print ('test file name {}'.format(data_dir+'test-'+ontology_type+add_name+'.tsv')) ##!!##!!

  ground_truth, prot_name_test = load_true_data (data_dir+'test-'+ontology_type+add_name+'.tsv',label_lookup) ##!!##!!
  print ('\nnum of prots in test data {}\n'.format(len(prot_name_test)))

  print ('\nread psiblast result')
  df_psiblast = pd.read_csv ( blast_result_dir+what_set+"-"+ontology_type+".psiblast.txt" , header=None, skip_blank_lines=True )
  df_psiblast = df_psiblast.dropna()
  df_psiblast = df_psiblast.reset_index(drop=True)

  prot_name_in_psi = sorted ( list ( set (list ( df_psiblast[0] ) ) ) )
  print ('\nnum of prots from test found in psiblast {}, we may be unable to find match for all test sequence\n'.format(len(prot_name_in_psi)))

  print ('\nread blast result')
  df_blast = pd.read_csv ( blast_result_dir+what_set+"-"+ontology_type+".blast.txt" , header=None,skip_blank_lines=True )

  ## should make prediction as a matrix
  # prediction = {}
  prediction = np.zeros([len(prot_name_test),len(label_lookup)])

  in_psi = set(df_psiblast[0])
  in_blast = set(df_blast[0])

  for index,this_prot in tqdm(enumerate(prot_name_test)) :

    if (this_prot not in in_psi) and (this_prot not in in_blast):
      print ('not found in both blast and psiblast {}'.format(this_prot))
      continue

    df_psiblast_g = df_psiblast[ df_psiblast[0] == this_prot ]
    df_psiblast_g = df_psiblast_g[ df_psiblast_g[1] != this_prot ] ## don't compare to self

    df_blast_g = df_blast[ df_blast[0] == this_prot ]
    df_blast_g = df_blast_g[ df_blast_g[1] != this_prot ] ## don't compare to self

    psiblast_go_score_array, w_psiblast = tally_over_n_template ( df_psiblast_g, prot_annot )
    blast_go_score_array, _ = tally_over_n_template ( df_blast_g, prot_annot )

    final_score = {}
    psiblast_go = list ( psiblast_go_score_array.keys() )
    blast_go = list ( blast_go_score_array.keys() )

    go_found = set ( psiblast_go + blast_go )
    if len(go_found) == 0: ## funky stuffs ??
      print ('pass 1st screen in blast+psiblast but not found any go term ?? {}'.format(this_prot))
      final_score[this_prot] = None
      continue

    for g in go_found: ## average between psiblast and blast
      if (g in psiblast_go_score_array) and (g in blast_go_score_array) :
        x1 = psiblast_go_score_array[g] * (1-w_psiblast) + blast_go_score_array[g] * (w_psiblast)
      if (g in psiblast_go_score_array) and (g not in blast_go_score_array) :
        x1 = psiblast_go_score_array[g]
      if (g not in psiblast_go_score_array) and (g in blast_go_score_array) :
        x1 = blast_go_score_array[g]
      final_score[g] = x1 ## each GO term has a score for this one protein

    ## done with this one protein
    prediction [index] = order_go_score (final_score,label_lookup)

    ## filter down original set so things run faster
    # df[~df.countries.isin(countries)]
    # df_psiblast = df_psiblast[ ~df_psiblast[0].isin([this_prot]) ]
    # df_blast = df_blast[ ~df_blast[0].isin([this_prot]) ]

    # if index > 10:
    #   print (prediction[0:10])
    #   exit()

  ## finish all proteins

  pickle.dump ( {'prediction':prediction, 'true_label':ground_truth}, open(blast_result_dir+what_set+"-"+ontology_type+"-prediction.pickle","wb") )

  result = evaluation_metric.all_metrics ( np.round(prediction) , ground_truth, yhat_raw=prediction, k=[5,10,15,20,25]) ## we can pass vector of P@k and R@k
  evaluation_metric.print_metrics( result )