import sys import os from ContactUtils import TopAccuracy from ContactUtils import LoadContactMatrix if __name__ == "__main__": if len(sys.argv) < 3: print 'python EvaluateContactAccuracyTXT.py predContactMatrixFile_txt nativeContactMatrixFile_txt [targetName]' print ' This script evaluates the accuracy of a predicted contact matrix by comparing it with native contact matrix' print ' Both matrix files are in text format with L lines and each line has L columns where L is the protein sequence length' exit(1) predFile = sys.argv[1] nativeFile = sys.argv[2] if len(sys.argv) > 3: target = sys.argv[3] else: target = os.path.basename(nativeFile).split('.')[0] pred = LoadContactMatrix(predFile) truth = LoadContactMatrix(nativeFile) accs = TopAccuracy(pred, truth) accsStr = [str(a) for a in accs] resultStr = target + ' ' + str(pred.shape[0]) + ' TopAcc ' resultStr += (' '.join(accsStr)) print resultStr
def str_display(ls): if not isinstance(ls, (list, tuple, np.ndarray)): str_ls = '{0:.4f}'.format(ls) return str_ls str_ls = ['{0:.4f}'.format(v) for v in ls ] str_ls2 = ' '.join(str_ls) return str_ls2 if __name__ == "__main__": if len(sys.argv) != 4: print 'python CalcMCCF1.py pred_matrix_file distcb_matrix_file target' print ' Both matrix files are text format with L lines and each line has L columns where L is the protein sequence length' exit(-1) predFile = sys.argv[1] distcbFile = sys.argv[2] target = sys.argv[3] pred = LoadContactMatrix(predFile) truth = LoadContactMatrix(distcbFile) for prob in np.arange(20, 60, 2): #print "prob=", prob accs = CalcMCCF1(pred=pred, truth=truth, probCutoff=prob/100.) resultStr = target + ' ' + str(pred.shape[0]) + ' cutoff=' + str(prob) + ' ' + str_display(accs) print resultStr
print '\t\twhen the suffix is .txt or .ccmpred, it is a text matrix file with L rows and L columns where L is protein length and each entry shall be predicted confidence/probability of being a contact' print '\t\twhen the suffix is .predictedDistMatrix.pkl, it is a file in cPickle format containing a tuple of at least 6 items: name, sequence, predictedDistProbMatrix, predictedContactProbMatrix, labelWeight and labelDistribution' print '\tgroundTruthFile: the ground truth file in text or cPickle format, usually ending with .native.pkl' print'\t\twhen it is a text format, its content shall be distance matrix instead of contact matrix, i.e., a large value indicates a non-contact and -1 indicates an invalid entry' exit(1) predFile = sys.argv[1] nativeFile = sys.argv[2] if len(sys.argv)>3: target = sys.argv[3] else: target = os.path.basename(nativeFile).split('.')[0] if predFile.endswith('.txt') or predFile.endswith('.ccmpred'): predCbCbContactMatrix = LoadContactMatrix(predFile) elif predFile.endswith('.pkl'): with open(predFile, 'rb') as fh: pred = cPickle.load(fh) predCbCbContactMatrix = pred[3]['CbCb'] else: print 'ERROR: predFile shall end with .txt or .pkl' exit(1) if nativeFile.endswith('.txt'): nativeCbCbDistMatrix = LoadContactMatrix(nativeFile) elif nativeFile.endswith('.pkl'): with open(nativeFile, 'rb') as fh: truth = cPickle.load(fh) nativeCbCbDistMatrix = truth['atomDistMatrix']['CbCb'] else: