예제 #1
0
import sys
import os

from ContactUtils import TopAccuracy
from ContactUtils import LoadContactMatrix

if __name__ == "__main__":

    if len(sys.argv) < 3:
        print 'python EvaluateContactAccuracyTXT.py predContactMatrixFile_txt nativeContactMatrixFile_txt [targetName]'
        print '	This script evaluates the accuracy of a predicted contact matrix by comparing it with native contact matrix'
        print '	Both matrix files are in text format with L lines and each line has L columns where L is the protein sequence length'
        exit(1)

    predFile = sys.argv[1]
    nativeFile = sys.argv[2]

    if len(sys.argv) > 3:
        target = sys.argv[3]
    else:
        target = os.path.basename(nativeFile).split('.')[0]

    pred = LoadContactMatrix(predFile)
    truth = LoadContactMatrix(nativeFile)

    accs = TopAccuracy(pred, truth)
    accsStr = [str(a) for a in accs]
    resultStr = target + ' ' + str(pred.shape[0]) + ' TopAcc '
    resultStr += (' '.join(accsStr))
    print resultStr
예제 #2
0
def str_display(ls):
        if not isinstance(ls, (list, tuple, np.ndarray)):
                str_ls = '{0:.4f}'.format(ls)
                return str_ls

        str_ls = ['{0:.4f}'.format(v) for v in ls ]
        str_ls2 = ' '.join(str_ls)
        return str_ls2


if __name__ == "__main__":

	if len(sys.argv) != 4:
    		print 'python CalcMCCF1.py pred_matrix_file distcb_matrix_file target'
		print '      Both matrix files are text format with L lines and each line has L columns where L is the protein sequence length'
    		exit(-1)

	predFile = sys.argv[1]
	distcbFile = sys.argv[2]
	target = sys.argv[3]

	pred = LoadContactMatrix(predFile)
	truth = LoadContactMatrix(distcbFile)

	for prob in np.arange(20, 60, 2):
		#print "prob=", prob
		accs = CalcMCCF1(pred=pred, truth=truth, probCutoff=prob/100.)
		resultStr = target + ' ' + str(pred.shape[0]) + ' cutoff=' + str(prob) + ' ' + str_display(accs)
		print resultStr

		print '\t\twhen the suffix is .txt or .ccmpred, it is a text matrix file with L rows and L columns where L is protein length and each entry shall be predicted confidence/probability of being a contact'
		print '\t\twhen the suffix is .predictedDistMatrix.pkl, it is a file in cPickle format containing a tuple of at least 6 items: name, sequence, predictedDistProbMatrix, predictedContactProbMatrix, labelWeight and labelDistribution'
		print '\tgroundTruthFile: the ground truth file in text or cPickle format, usually ending with .native.pkl'
		print'\t\twhen it is a text format, its content shall be distance matrix instead of contact matrix, i.e., a large value indicates a non-contact and -1 indicates an invalid entry'
    		exit(1)

	predFile = sys.argv[1]
	nativeFile = sys.argv[2]

	if len(sys.argv)>3:
		target = sys.argv[3]
	else:
		target = os.path.basename(nativeFile).split('.')[0]

	if predFile.endswith('.txt') or predFile.endswith('.ccmpred'):
		predCbCbContactMatrix = LoadContactMatrix(predFile)
	elif predFile.endswith('.pkl'):
		with open(predFile, 'rb') as fh:
			pred = cPickle.load(fh)
		predCbCbContactMatrix = pred[3]['CbCb']
	else:
		print 'ERROR: predFile shall end with .txt or .pkl'
		exit(1)

	if nativeFile.endswith('.txt'):
		nativeCbCbDistMatrix = LoadContactMatrix(nativeFile)
	elif nativeFile.endswith('.pkl'):
		with open(nativeFile, 'rb') as fh:
			truth = cPickle.load(fh)
		nativeCbCbDistMatrix = truth['atomDistMatrix']['CbCb']
	else: