def str_display(ls): if not isinstance(ls, (list, tuple, np.ndarray)): str_ls = '{0:.4f}'.format(ls) return str_ls str_ls = ['{0:.4f}'.format(v) for v in ls ] str_ls2 = ' '.join(str_ls) return str_ls2 if __name__ == "__main__": if len(sys.argv) != 4: print 'python CalcMCCF1.py pred_matrix_file distcb_matrix_file target' print ' Both matrix files are text format with L lines and each line has L columns where L is the protein sequence length' exit(-1) predFile = sys.argv[1] distcbFile = sys.argv[2] target = sys.argv[3] pred = LoadContactMatrix(predFile) truth = LoadContactMatrix(distcbFile) for prob in np.arange(20, 60, 2): #print "prob=", prob accs = CalcMCCF1(pred=pred, truth=truth, probCutoff=prob/100.) resultStr = target + ' ' + str(pred.shape[0]) + ' cutoff=' + str(prob) + ' ' + str_display(accs) print resultStr
predInfo = cPickle.load(fh) pred = predInfo[3]['CbCb'] preds[protein] = pred nativeFile = os.path.join(truthDir, protein + '.native.pkl') with open(nativeFile, 'rb') as fh: nativeInfo = cPickle.load(fh) truth = nativeInfo['atomDistMatrix']['CbCb'] truths[protein] = truth for prob in np.arange(5, 60, 1): #print "prob=", prob accs = [] for protein in content: acc = CalcMCCF1(pred=preds[protein], truth=truths[protein], probCutoff=prob / 100.) accs.append(acc) avgacc = np.average(accs, axis=0) resultStr = 'per-target avgMCCF1 at cutoff=' + str( prob) + ': ' + str_display(avgacc) print resultStr lrMCC = Metrics.MCC(avgacc[1], avgacc[2], avgacc[3], avgacc[4]) lrF1, lrprecision, lrrecall = Metrics.F1(avgacc[1], avgacc[2], avgacc[3], avgacc[4]) mrMCC = Metrics.MCC(avgacc[9], avgacc[10], avgacc[11], avgacc[12]) mrF1, mrprecision, mrrecall = Metrics.F1(avgacc[9], avgacc[10], avgacc[11], avgacc[12])