Ejemplo n.º 1
0
def main(argv):
    inputFile = None
    if len(argv) < 1:
        Usage()
        exit(1)
    inputFile = argv[0]

    printBoundPKL = False
    if len(argv) >= 2:
        printBoundPKL = True

    if inputFile is None:
        print 'ERROR: Please provide an input file'
        exit(1)
    if not os.path.isfile(inputFile):
        print 'ERROR: The input file does not exist: ', inputFile
        exit(1)

    content = DistanceUtils.LoadRawDistProbFile(inputFile)
    targetName, sequence, predictedDistProbMatrix, predictedContactProbMatrix = content[:
                                                                                        4]
    """ Skip this step since in version 3, we use an unbiased deep model
	if labelWeight is not None:
    		fixedProb = dict()
		for apt in predictedDistProb.keys():
			#print 'shapes: ', predictedDistProb[apt].shape, np.array(labelWeight[apt]).shape, np.array(labelDistribution[apt]).shape
    			fixedProb[apt] = DistanceUtils.FixDistProb( predictedDistProb[apt], labelWeight[apt], labelDistribution[apt])
		
	else:
		## in this case, the probability values in predictedDistProb are already corrected
		fixedProb = predictedDistProb
	fixedProb = predictedDistProb

    	if printProbMatrix:
		probFileName = targetName + probFileSuffix
		fh = open(probFileName, 'wb')
		cPickle.dump(fixedProb, fh, protocol = cPickle.HIGHEST_PROTOCOL)
		fh.close()
	"""

    bounds = EstimateDistanceBounds(predictedDistProbMatrix)

    ## output Cb-Cb bound in text format
    if bounds.has_key('CbCb'):
        boundFileName = targetName + '.bound.txt'
        boundMatrix = bounds['CbCb']
        SaveBoundInListFormat(targetName, sequence, boundMatrix, boundFileName)

    if not printBoundPKL:
        return
    boundFileName = targetName + '.bound.pkl'
    with open(boundFileName, 'wb') as fh:
        cPickle.dump((bounds, targetName, sequence),
                     fh,
                     protocol=cPickle.HIGHEST_PROTOCOL)
def main(argv):

    	inputFile = None
    	targetName = None
	labelNames = config.allAtomPairNames + config.allOrientationNames
	potentialFileSuffix = 'pkl'
	minPotential = -30.0
	maxPotential = 30.0

	UseWeight4Orientation = True
	UseWeight4Distance = True

	## the largest dist cutoff
	rc = 18

	alpha4DFIRE = 1.61
	alpha4DFIREstr = '1.61'

	rgScale4DOPE = 1.

	## reference 
	reference = 'DFIRE'

	##
	UseRef4Orientation = True

	## refFile for SimuRW
	refFile = None

	#savefolder = os.getcwd()
	savefile=""

	if len(argv) < 1:
		Usage()
		exit(1)

    	try:
        	opts, args = getopt.getopt(argv,"a:w:r:l:u:f:s:o",["labelNames=", "useWeight=", "refState=", "minPotential=", "maxPotential=", "refFile=", "savefile=", "noRef4Orientation="])
        	#print opts, args
    	except getopt.GetoptError:
        	Usage()
        	exit(1)

    	if len(args) != 1:
        	Usage()
        	exit(1)

	inputFile = args[0]

    	for opt, arg in opts:
		if opt in ("-a", "--labelNames"):
			labelNames = config.ParseLabelNames(arg)

		elif opt in ("-w", "--useWeight"):
			scheme = np.int32(arg)
			UseWeight4Orientation = (2 & scheme)>0
			UseWeight4Distance = (1 & scheme)>0

		elif opt in ("-r", "--refState"):
			fields = arg.split('+')
			reference = fields[0].upper()
			if reference not in allRefTypes:
				print 'ERROR: allowed reference types: ', allRefTypes
				exit(1)

			if len(fields) > 1:
				if fields[1].isdigit():
					rc = np.int32(fields[1])
				else:
					rc = np.float32(fields[1])

				if reference  == 'DFIRE':
					if len(fields) > 2:
						alpha4DFIREstr = fields[2]
						alpha4DFIRE = np.float32(fields[2])

				elif reference == 'DOPE':
					if len(fields) > 2:
						rgScale4DOPE = np.float32(fields[2])
				elif reference == 'SimuRW'.upper():
					#rc = np.float32(fields[1])
					print 'Using SimuRW potential'
				else:
					print 'ERROR: unsupported reference format: ', arg
					exit(1)
				

		elif opt in ("-f", "--refFile"):
			refFile = arg
			if not os.path.isfile(refFile):
				print 'the provided file for reference state is not valid: ', refFile
				exit(1)

		elif opt in ("-o", "--noRef4Orientation"):
			UseRef4Orientation = False

		elif opt in ("-s", "--savefile"):
			savefile = arg

		elif opt in ("-l", "--minPotential"):
			minPotential = np.float32(arg)
		elif opt in ("-u", "--maxPotential"):
			maxPotential = np.float32(arg)

		else:
	    		Usage()
	    		exit(1)

    	if inputFile is None:
		print 'ERROR: Please provide an input file'
		exit(1)
    	if not os.path.isfile(inputFile):
		print 'ERROR: The input file does not exist: ', inputFile
		exit(1)

	if reference in allRefTypesWithFiles and refFile is None:
		print 'ERROR: The file for user-sepcified reference state is empty'
		exit(1)

	if reference == 'DFIRE':
		if alpha4DFIRE > 10:
			## take a random value between 1.57 and 1.63
			alpha4DFIRE=random.uniform(1.57, 1.63)

		print 'alpha for DFIRE potential is ', alpha4DFIRE
		if alpha4DFIRE<1.55 or alpha4DFIRE>1.75:
			print 'ERROR: alpha4DFIRE shall be between 1.55 and 1.75'
			exit(1)

	if reference == 'DOPE':
		print 'rgScale for DOPE potential is', rgScale4DOPE
		if rgScale4DOPE > 1.2 or rgScale4DOPE <0.8:
			print 'ERROR: rgScale4DOPE shall be between 0.8 and 1.2'
			exit(1)

	if UseWeight4Distance:
		print 'Use weight for distance potential'
	if UseWeight4Orientation:
		print 'Use weight for orientation potential'
	if not UseRef4Orientation:
		print 'Do not use reference for orientation'


    	content = DistanceUtils.LoadRawDistProbFile(inputFile)
	assert len(content) >=6

    	name, sequence, predictedProb, predictedContactProb, labelWeight, labelDistribution = content[:6]
	assert labelWeight is not None, "labelWeight shall not be empty"
	predData = (predictedProb, labelWeight, labelDistribution)

        targetName = os.path.basename(inputFile).split('.')[0]
	print 'Generating potential for ', targetName, 'with the following labels: ', labelNames

	filenames = [ targetName, 'pairPotential']

	if reference == 'DFIRE':
		pairPotential, cutoffs, validProb, distPotential, oriPotential = CalcDistOriPotential(predData, labelNames, distPotType='DFIRE', param4Potential=alpha4DFIRE, largestDistance=rc, useWeight4Dist=UseWeight4Distance, useRef4Ori=UseRef4Orientation, useWeight4Ori=UseWeight4Orientation, minPotential=minPotential, maxPotential=maxPotential)
		filenames.extend([reference, str(rc), alpha4DFIREstr])
	elif reference == 'DOPE':
		pairPotential, cutoffs, validProb, distPotential, oriPotential = CalcDistOriPotential(predData, labelNames, distPotType='DOPE', param4Potential=rgScale4DOPE, largestDistance=rc, useWeight4Dist=UseWeight4Distance, useRef4Ori=UseRef4Orientation, useWeight4Ori=UseWeight4Orientation, minPotential=minPotential, maxPotential=maxPotential)
		filenames.extend([reference, str(rc), str(rgScale4DOPE)])
	else:
		print 'ERROR: unimplemented potential type: ', reference
		exit(1)

	if bool(oriPotential) and UseRef4Orientation:
		filenames.append('Ref4O')

	wStr=None
	if (bool(distPotential) and UseWeight4Distance) and (bool(oriPotential) and UseWeight4Orientation):
		wStr = 'Wt4OD'
	elif bool(oriPotential) and UseWeight4Orientation:
		wStr = 'Wt4O'
	elif bool(distPotential) and UseWeight4Distance:
		wStr = 'Wt4D'

	if wStr is not None:
		filenames.append(wStr)

	filenames.append('pkl')
	if savefile == "":
		savefile = '.'.join(filenames)

	## save the result
        with open(savefile, 'wb') as fh:
		cPickle.dump((name, sequence, pairPotential, cutoffs, validProb), fh, protocol=cPickle.HIGHEST_PROTOCOL)
Ejemplo n.º 3
0
def main(argv):

    	inputFile = None
    	targetName = None
	labelNames = ['CbCb']
	potentialFileSuffix = 'pkl'
	minPotential = -30.0
	maxPotential = 30.0
	minSeqSep = 3
	minSeqSepStr='3'

	## the largest dist cutoff
	rc = 18

	alpha4DFIRE = 1.61
	rgScale4DOPE = 1.

	## reference 
	reference = 'DFIRE'

	## refFile
	refFile = None

    	try:
        	opts, args = getopt.getopt(argv,"i:a:r:l:u:s:f:tn",["input=", "atomPairType=", "refState=", "minPotential=", "maxPotential=", "minSeqSep=", "refFile=", "textFormat=", "nonZero="])
        	print opts, args
    	except getopt.GetoptError:
        	Usage()
        	exit(1)


    	if len(opts) < 1:
        	Usage()
        	exit(1)

    	for opt, arg in opts:
		if opt in ("-i", "--input"):
	    		inputFile = arg

		elif opt in ("-a", "--atomPairType"):
			labelNames = config.ParseLabelNames(arg)

		elif opt in ("-r", "--refState"):
			fields = arg.split('+')
			reference = fields[0].upper()
			if reference not in allRefTypes:
				print 'allowed reference types: ', allRefTypes
				exit(1)

			if len(fields) > 1:
				if reference  == 'DFIRE':
					rc = np.float32(fields[1])
					if len(fields) > 2:
						alpha4DFIRE = np.float32(fields[2])

				elif reference == 'DOPE':
					rc = np.float32(fields[1])
					if len(fields) > 2:
						rgScale4DOPE = np.float32(fields[2])
				elif reference == 'SimuRW'.upper():
					rc = np.float32(fields[1])
				else:
					print 'WARNING: unsupported reference format: ', arg
				

		elif opt in ("-f", "--refFile"):
			refFile = arg
			if not os.path.isfile(refFile):
				print 'the provided file for reference state is not valid: ', refFile
				exit(1)

		elif opt in ("-l", "--minPotential"):
			minPotential = np.float32(arg)
		elif opt in ("-u", "--maxPotential"):
			maxPotential = np.float32(arg)

		elif opt in ("-s", "--minSeqSep"):
			minSeqSep = np.int32(arg)
			minSeqSepStr = arg
			if minSeqSep < 1:
				print 'ERROR: minSeqSep shall be at least 1'
				exit(1)

		elif opt in ("-t", "--textFormat"):
	    		potentialFileSuffix = '.txt'

		elif opt in ("-n", "--nonZero"):
			resetFlag = False	

		else:
	    		Usage()
	    		exit(1)

    	if inputFile is None:
		print 'Please provide an input file'
		exit(1)
    	if not os.path.isfile(inputFile):
		print 'The input file does not exist: ', inputFile
		exit(1)

	if reference in allRefTypesWithFiles and refFile is None:
		print 'The file for user-sepcified reference state is empty'
		exit(1)

        targetName = os.path.basename(inputFile).split('.')[0]

    	content = DistanceUtils.LoadRawDistProbFile(inputFile)
	assert len(content) >=6

    	name, sequence, predictedDistProb, predictedContactProb, labelWeight, labelDistribution = content[:6]
	assert labelWeight is not None, "labelWeight shall not be empty"

	## if needed, add code to here the predicted dist probability

	filenames = [ targetName, 'distPotential']
	if reference == 'DFIRE':
		potential = CalcPotentialByDFIRE(predictedDistProb, alpha=alpha4DFIRE, largestDistance=rc, minPotential=minPotential, maxPotential=maxPotential)
		filenames.extend([reference, str(rc), str(alpha4DFIRE), potentialFileSuffix])
	elif reference == 'DOPE':
		potential = CalcPotentialByDOPE(predictedDistProb, largestDistance=rc, rgScale=rgScale4DOPE, minPotential=minPotential, maxPotential=maxPotential)
		filenames.extend([reference, str(rc), str(rgScale4DOPE), potentialFileSuffix])
	elif reference == 'SimuRW'.upper():
		potential = CalcPotentialBySimuRW(predictedDistProb, refFile, largestDistance=rc, minPotential=minPotential, maxPotential=maxPotential)
		filenames.extend([reference, str(rc), potentialFileSuffix])
	else:
		print 'ERROR: unimplemented reference state: ', reference
		exit(1)

	potentialFileName = '.'.join(filenames)

	## save to PKL file
	if potentialFileName.endswith('.pkl'):
        	fh = open(potentialFileName, 'wb')
		potential_new = dict()
		distCutoffs = dict()
		for response, pot in potential.iteritems():
			labelName = config.Response2LabelName(response)
			if labelName not in set(labelNames):
				continue

			potential_new[response] = pot
			distCutoffs[response] = config.GetCutoffs(response)

		cPickle.dump((name, sequence, potential_new, distCutoffs), fh, protocol=cPickle.HIGHEST_PROTOCOL)
		fh.close()
		return

	## save to text file
	potentialFileName = targetName + '.distPotential.s' + minSeqSepStr + potentialFileSuffix
	fh = open(potentialFileName, 'w')
	fh.write('#TARGET\t' + targetName + '\n')
	fh.write('#SEQ\t' + sequence + '\n')
	fh.write('#DistanceBinBoundaries\t' + "Please check config.py" + '\n')

	for response, pot in potential.iteritems():
		labelName, labelType, subType = config.ParseResponse(response)
		if labelName not in set(labelNames):
			continue

		size = pot.shape
		for i in xrange(size[0]):
			rawPotStrs = []

			for j in xrange(i+ minSeqSep, size[1]):
				atom1, atom2 = config.SelectAtomPair(sequence, i, j, labelName)
				y = pot[i, j]

				rawPotStr = ' '.join(['AtomPair', atom1.upper(), str(i+1), atom2.upper(), str(j+1), subType] + [ "{:.4f}".format(e) for e in y ] )
				rawPotStrs.append(rawPotStr)

			if len(rawPotStrs) >0:
				fh.write('\n'.join(rawPotStrs) + '\n')

	fh.close()