Ejemplo n.º 1
0
def CalcPotentialByEmpSI(predDistMatrix, userRef, largestDistance=20, sequence=None, minPotential=-20., maxPotential=20.):
	f=open(userRef, 'rb')
	refData = cPickle.load(f)
	f.close()

	potentials = dict()
        for response, prdProb in predDistMatrix.iteritems():
		labelName, labelType, _ = config.ParseResponse(response)
		if labelName not in config.allAtomPairNames:
			continue
		if not conifg.IsDiscreteLabel(labelType):
			continue

                refProb = refData[response][0]
		potential = - np.log ( predProb / refProb )
		
		rc = largestDistance
		cutoff = config.GetCutoffs(response)
		lastDistBin = DistanceUtils.LabelsOfOneDistance(rc, cutoff)
		lastCol = potential[:, :, lastDistBin]
		potential = potential - lastCol
		potential[:, :, lastDistBin: ] =0

		CheckPotentialValues(potential)
                potentials[response] = potential

        return potentials
Ejemplo n.º 2
0
def EstimateDistanceBounds(predictedMatrix):
    bounds = dict()
    for response, pred in predictedMatrix.iteritems():
        labelName, labelType, subType = config.ParseResponse(response)
        if labelName not in config.allAtomPairNames:
            continue
        bounds[labelName] = EstimateDistanceBound(pred, response)
    return bounds
def CalcDistOriPotential(predData, labelNames=['CaCa', 'CbCb', 'NO'] + ['Ca1Cb1Cb2Ca2','N1Ca1Cb1Cb2','Ca1Cb1Cb2'], distPotType='DFIRE', param4Potential=1.61, largestDistance=18, useWeight4Dist=True, useRef4Ori=True, useWeight4Ori=True, minPotential=-30, maxPotential=30):
	assert distPotType.upper() in ['DFIRE', 'DOPE']

	predProbMatrix, labelWeight, labelDistribution = predData

	validDistribution = dict()
   	validLabelWeight = dict()
        validLabelDistribution = dict()

        existingLabelNames = []
        for response, pred in predProbMatrix.iteritems():
                labelName,_, _ = config.ParseResponse(response)
                if labelName not in labelNames:
                        continue
                existingLabelNames.append(labelName)
                validDistribution[response] = pred
                validLabelWeight[response] = labelWeight[response]
                validLabelDistribution[response] = labelDistribution[response]

        missingLabelNames = list(set(labelNames) - set(existingLabelNames))
        if len(missingLabelNames)>0:
                print 'WARNING: the predicted probability file does not have information for the following label names: ', missingLabelNames

        pairPotential = dict()
        validProb = dict()

	if distPotType == 'DOPE':
		distPotential, distValidProb = CalcPotentialByDOPE(validDistribution, largestDistance=rc, rgScale=param4Potential, useWeight=useWeight4Dist, minPotential=minPotential, maxPotential=maxPotential)
	else:
        	distPotential, distValidProb = CalcPotentialByDFIRE(validDistribution, alpha=param4Potential, largestDistance=largestDistance, useWeight=useWeight4Dist, minPotential=minPotential, maxPotential=maxPotential)
        pairPotential.update(distPotential)
	validProb.update(distValidProb)

        oriPotential, oriValidProb = CalcOrientationPotential(validDistribution, useRef=useRef4Ori, useWeight=useWeight4Ori, labelWeight=validLabelWeight, labelDistribution=validLabelDistribution, minPotential=minPotential, maxPotential=maxPotential)
        pairPotential.update(oriPotential)
        validProb.update(oriValidProb)

	cutoffs = dict()
	for response in pairPotential.keys():
		cutoffs[response] = config.GetCutoffs(response)

	return pairPotential, cutoffs, validProb, distPotential, oriPotential
Ejemplo n.º 4
0
def CalcPotentialByEmpSD(predDistMatrix, userRef, largestDistance=20, sequence=None, minPotential=-20., maxPotential=20.):
	f=open(userRef, 'rb')
	refData = cPickle.load(f)
	f.close()

	potentials = dict()
        for response, predProb in predDistMatrix.iteritems():
		labelName, labelType, _ = config.ParseResponse(response)
		if labelName not in config.allAtomPairNames:
			continue
		if not conifg.IsDiscreteLabel(labelType):
			continue

                refProbList = refData[response][1]

		length = predProb.shape[0]
		if length < 400:
			refProbs = [ ref for sz, freq, ref in refProbList if sz<=1.3*length and sz>=length/1.3 ]
		else:
			refProbs = [ ref for sz, freq, ref in refProbList if sz>=350 ]

		print '#refProbMatrix: ', len(refProbs), ' for proteins with length= ', length

		refProb = np.average(refProbs, axis=0)
		potential = - np.log ( predProb / refProb )

		rc = largestDistance
		cutoff = config.GetCutoffs(response)
		lastDistBin = DistanceUtils.LabelsOfOneDistance(rc, cutoff)
		
		lastCol = potential[:, :, lastDistBin]
		potential = potential - lastCol
		potential{;, :, lastDistBin: ] = 0

		CheckPotentialValues(potential)
                potentials[response] = potential

        return potentials
def CalcOrientationPotential(predOriMatrix, useRef=True, useWeight=True, labelWeight=None, labelDistribution=None, minPotential=-30, maxPotential=30):
	potentials = dict()
	## validProbs save the Prob(d<20) for all atom/residue pairs
	validProbs = dict()
	for response, predProb in predOriMatrix.iteritems():
		labelName, labelType, subType = config.ParseResponse(response)
                if labelName not in config.allOrientationNames:
                        #print 'WARNING: unsupported response for orientation potential: ', response
                        continue
                if not config.IsDiscreteLabel(labelType):
                        print 'WARNING: the orientation label is not discrete: ', response
                        continue
		numLabels = config.GetResponseProbDims(response)
		if subType.endswith('Plus') or subType.endswith('Minus'):
			largestValidLabel = numLabels -2
		else:
			largestValidLabel = numLabels -1

		probOfValid = predProb[:, :, :largestValidLabel]
		#potential = np.zeros_like(probOfValid)
		potential = -np.log(probOfValid)

		if useRef:
			refOfValid = labelDistribution[response][:,:largestValidLabel ]
			refPot = -np.log(refOfValid)
			for i in range(predProb.shape[0]):
				for j in range(predProb.shape[1]):
					if i==j:
						continue
					offset = abs(i-j)
					rangeIndex = RangeNWeight.GetRangeIndex(offset, numRanges=refOfValid.shape[0])
                        		if rangeIndex < 0:
                        			continue
					potential[i, j] -= refPot[rangeIndex]

		## shift potential by its mean
		potential -= np.mean(potential, axis=2, keepdims=True)

		validProb = np.sum(probOfValid, axis=2)

		## multiply the potential by the probability of distance<20A
		if useWeight:
			potential *= validProb[:, :, np.newaxis]
			
		"""
		x = np.sum(probOfValid, axis=2)
		np.fill_diagonal(x, 0)
		np.fill_diagonal(x[1:], 0)
		np.fill_diagonal(x[:,1:], 0)
		ind = np.unravel_index(np.argsort(-x, axis=None), x.shape)

		ratio = min(topRatio, predProb.shape[0]-3)
		topK = np.int32(ratio * predProb.shape[0])
		"""
		"""
		for i, j in zip(ind[0][:topK], ind[1][:topK]):
			offset = abs(i-j)
			rangeIndex = RangeNWeight.GetRangeIndex(offset, numRanges=refOfValid.shape[0])
                        if rangeIndex < 0:
                        	continue
			potential[i, j] = -np.log(probOfValid[i, j])
			if useRef:
				potential[i, j] -= refPot[rangeIndex]
			potential[i, j] -= np.mean(potential[i, j])
			if weighted:
				potential[i, j] *= x[i, j]
		"""

		CheckPotentialValues(m=potential)
		potentials[response] = potential.astype(np.float32)
		validProbs[response] = validProb.astype(np.float32)

	return potentials, validProbs
def CalcPotentialBySimuRW(predDistMatrix, refFile, largestDistance=20, sequence=None, useWeight=False, minPotential=-30., maxPotential=30.):
	f=open(refFile, 'rb')
	refData = cPickle.load(f)
	f.close()

	potentials = dict()
        for response in predDistMatrix.keys():
		labelName, labelType, _ = config.ParseResponse(response)
		if labelName not in config.allAtomPairNames:
                        #print 'WARNING: unsupported response for SimuRW potential: ', response
			continue
		if not conifg.IsDiscreteLabel(labelType):
			continue

                predProb = predDistMatrix[response]

		## the first row of refProb corresponds to offset=1
                refProb = refData[response]
		if labelName != 'CbCb':
			print 'distance label name not supported yet: ', labelName
			exit(1)

		if not subType.endswith('34C'):
			print 'distance label type not supported yet: ', subType
			exit(1)

		cutoff = config.GetCutoffs(response)

		length = predProb.shape[0]
		numLabels = predProb.shape[2]
		assert numLabels == refProb.shape[1]

		## maxAllowedDist[offset] is the maximum physically feasible distance between two Cb atoms when their sequence separation is equal to offset
		maxAllowedDist = [ (offset * 3.8 + 3.06) for offset in range(length) ]
		maxAllowedDist[0] = 0
		eps = 0.00001
		maxAllowedDist[2] = 10.5 - eps
		maxAllowedDist[3] = 13.0 - eps
		maxAllowedDist[4] = 15.5 - eps
		maxAllowedDist[5] = 17.5 - eps
		maxAllowedDist[6] = 19.5 - eps

		potential = np.zeros_like(predProb)

		for i in range(0, length):
			for j in range(i+2, length):
				offset = j-i
				## find the distance bin into which the maxAllowedDist falls
				lastDistBin = DistanceUtils.LabelsOfOneDistance(maxAllowedDist[offset], cutoff)
				if lastDistBin < (numLabels - 1):
					## merge the pred prob and ref prob in the bins from lastDistBin to the end
					pred = predProb[i, j,  : lastDistBin+1]
					ref = refProb[offset-1][:lastDistBin+1]

					potential[i, j, :lastDistBin+1] = -np.log( pred / ref )
					potential[i, j, lastDistBin+1: ] = maxPotential
				else:
					## determine the last distance bin
                			rc = min(cutoff[-1], largestDistance) - 0.001
                			if (rc<10.0):
                        			print 'ERROR: the largest distance cutoff for SimuRW is too small: ', rc
                        			exit(1)
                			rc_index = DistanceUtils.LabelsOfOneDistance(rc, cutoff)

					refProbLen = refProb.shape[0]
					#idx4rc = numLabels - 2
					potential[i, j] = -np.log( predProb[i, j] / refProb[min(offset, refProbLen) -1 ] )
					potential[i, j] -= potential[i, j, rc_index]
					potential[i, j, rc_index + 1: ] = 0

				## only valid for symmetric atom pairs
				potential[j, i] = potential[i, j]

		if useWeigt and subType.endswith('Plus'):
			potential *= (1-predProb[:, :, -1])

                CheckPotentialValues(potential)

		potentials[response] = potential

        return potentials
def CalcPotentialByDOPE(predDistMatrix, largestDistance=20, rgScale=1., useWeight=False, minPotential=-30., maxPotential=30.):
	potentials = dict()
	validProbs = dict()
	for response in predDistMatrix.keys():
		labelName, labelType, subType = config.ParseResponse(response)
                if labelName not in config.allAtomPairNames:
                        #print 'WARNING: unsupported response for DOPE potential: ', response
                        continue
		if not conifg.IsDiscreteLabel(labelType):
			continue

                cutoff = config.GetCutoffs(response)

		## determine the last distance bin
                rc = min(cutoff[-1], largestDistance) - 0.001
                if (rc<10.0):
                        print 'ERROR: the largest distance cutoff for DOPE is too small: ', rc
                        exit(1)
                rc_index = DistanceUtils.LabelsOfOneDistance(rc, cutoff)

		binwidths = [ d2 - d1 for d1, d2 in zip(cutoff[:-1], cutoff[1:]) ]
		bincenters = [ (d2 + d1)/2. for d1, d2 in zip(cutoff[:-1], cutoff[1:]) ]

		## a is the radius of reference sphere and rg is the estimated radius of gyration
		length = predDistMatrix[response].shape[0]
		rg = 0.395*length**(3./5)+7.257	
		a = np.sqrt(5./3) * rg * rgScale

		""" calculate n(r,a) defined in the DOPE paper. Below is the original formulation.
		## rc is the upper bound of distance between two atoms
		rc = bincenters[-1]
		if rc <= 2*a:
			#nra = 6. * np.square(bincenters * (bincenters - 2*a)) * (bincenters + 4*a) / np.power(rc,3) /(np.power(rc, 3) - 18 * np.square(a)*rc + 32 * np.power(a, 3))
		else:
			#nra = 3* np.square(bincenters * (bincenters - 2*a)) * (bincenters + 4*a) / 16. / np.power(a, 6)
		"""
		## calculate n(r,a) described in the DOPE paper. Ignore the constant factor and the denominator since they are same for all distance bins
		nra = np.square(bincenters * (bincenters - 2*a)) * (bincenters + 4*a) 

		def CalcApproxRefPot(idx=0):
			points = np.arange(cutoff[idx] + 0.5/2, cutoff[idx+1], 0.5)
			values = np.square(points * (points - 2*a)) * (points + 4*a) 
			tmpNra = np.average(values)	
			return tmpNra

		## get a more accurate estimation of nra for the first several bins if their binwidth is > 0.5
		for i in range(len(binwidths)):
			if binwidths[i] >= 1:
				nra[i] = CalcApproxRefPot(i) 

		## calculate reference potential defined as log (nra(r)/nra(rc)) + log(\delta r/ \delta rc)
		## \delta(r) is equal to binwidths
		refPot = np.log( nra / nra[rc_index] * binwidths / binwidths[rc_index] )
		
	  	## calculate the observed potential defined as log( p(r) /p(rc) ) where p(r) is the predicted distance probability
                predProb = predDistMatrix[response]
                predProbRC = predProb[:, :, rc_index : rc_index+1]
                obsPot = np.log(predProb / predProbRC)

                ## calculate the final potential, which is the difference between reference and observed potential
                potential = np.zeros_like(predDistMatrix[response])
                potential[:, :, :rc_index ] = refPot[: rc_index] - obsPot[:, :, :rc_index]

		if subType.endswith('Plus'):
			validProb = 1 - predProb[:, :, -1]
		else:
			validProb = np.ones((predProb.shape[0], predProb.shape[1]), dtype=np.float32)

		##if useWeight and the prob of disroder exists, adjust potential by prob of not beining in disorder status
		if useWeight and subType.endswith('Plus'):
			potential *= validProb[:, :, np.newaxis]

		## remove the potential for the last distance bin, which corresponds to disorder status
		if subType.endswith('Plus'):
			potential = potential[:, :, :-1]

		CheckPotentialValues(m=potential)

		potentials[response] = potential.astype(np.float32)
		validProbs[response] = validProb.astype(np.float32)

	return potentials, validProbs
def CalcPotentialByDFIRE(predDistMatrix, alpha=1.61, largestDistance=18, useWeight=False, minPotential=-30, maxPotential=30):
	potentials = dict()

	## validProbs saves the prob of one atom/residue pair likely have valid coordinates
	validProbs = dict()
	for response in predDistMatrix.keys():
		labelName, labelType, subType = config.ParseResponse(response)
		if labelName not in config.allAtomPairNames:
			#print 'WARNING: unsupported response for DFIRE potential: ', response
			continue
		if not config.IsDiscreteLabel(labelType):
                        print 'WARNING: the distance label is not discrete: ', response
			continue

		cutoff = config.GetCutoffs(response)

		## determine the last distance bin
		rc = min(cutoff[-1], largestDistance) - 0.001
		if (rc<10.0):
			print 'ERROR: the largest distance cutoff for DFIRE is too small: ', rc
			exit(1)
		rc_index = DistanceUtils.LabelsOfOneDistance(rc, cutoff)

		binwidths = [ d2 - d1 for d1, d2 in zip(cutoff[:-1], cutoff[1:]) ]
		bincenters = [ (d2 + d1)/2. for d1, d2 in zip(cutoff[:-1], cutoff[1:]) ]

		## calculate reference potential defined as alpha*log (r/rc) + log(\delta r/ \delta rc)
		## \delta(r) is binwidths and r is the bincenters
		refPot = alpha * np.log( bincenters / bincenters[rc_index]) + np.log( binwidths / binwidths[rc_index] )

		## idx is the index for a bin
		def CalcApproxRefPot(idx=0):
                        points = np.arange(cutoff[idx] + 0.5/2, cutoff[idx+1], 0.5)
                        values = np.power(points / bincenters[rc_index], alpha)
			avg = np.average(values)
                        tmpRefPot = np.log(avg) + np.log( binwidths[idx] / binwidths[rc_index] )
			return tmpRefPot

		## get a more accurate estimation of reference for the bin with a large width
		for i in range(len(binwidths)):
			if binwidths[i] >= 1:
				refPot[i] = CalcApproxRefPot(i)
		
		## calculate the observed potential defined as log( p(r) /p(rc) ) where p(r) is the predicted distance probability
		predProb = predDistMatrix[response]
		predProbRC = predProb[:, :, rc_index : rc_index+1]
		#obsPot = np.log(predProb / (sys.float_info.min + predProbRC))
		obsPot = np.log(predProb / predProbRC)

		## calculate the final potential, which is the difference between reference potential and observed potential
		potential = np.zeros_like(predDistMatrix[response])
		potential[:, :, :rc_index ] = refPot[: rc_index] - obsPot[:, :, :rc_index]

		if subType.endswith('Plus'):
			validProb = 1 - predProb[:, :, -1]
		else:
			validProb = np.ones((predProb.shape[0], predProb.shape[1]), dtype=np.float32)

		##if useWeight=True and the prob of being disorder exists, adjust potential by the prob of not being in disorder status
		if useWeight and subType.endswith('Plus'):
			potential *= validProb[:, :, np.newaxis]

		## remove the potential for the last distance bin, which corresponds to disorder status
		if subType.endswith('Plus'):
			potential = potential[:, :, :-1]

		CheckPotentialValues(m=potential)

		potentials[response] = potential.astype(np.float32)
		validProbs[response] = validProb.astype(np.float32)

	return potentials, validProbs
Ejemplo n.º 9
0
def CalcPotentialByDFIRE(predDistMatrix, alpha=1.61, largestDistance=15, minPotential=-20, maxPotential=20):
	potentials = dict()
	for response in predDistMatrix.keys():
		labelName, labelType, subType = config.ParseResponse(response)
		if labelName not in config.allAtomPairNames:
			print 'WARNING: unsupported response for DFIRE potential: ', response
			continue
		if not conifg.IsDiscreteLabel(labelType):
			continue

		cutoff = config.GetCutoffs(response)

		## determine the last distance bin
		rc = min(cutoff[-1], largestDistance) - 0.001
		if (rc<10.0):
			print 'ERROR: the largest distance cutoff for DFIRE is too small: ', rc
			exit(1)
		rc_index = DistanceUtils.LabelsOfOneDistance(rc, cutoff)

		binwidths = [ d2 - d1 for d1, d2 in zip(cutoff[:-1], cutoff[1:]) ]
		bincenters = [ (d2 + d1)/2. for d1, d2 in zip(cutoff[:-1], cutoff[1:]) ]

		## calculate reference potential defined as alpha*log (r/rc) + log(\delta r/ \delta rc)
		## \delta(r) is binwidths and r is the bincenters
		refPot = alpha * np.log( bincenters / bincenters[rc_index]) + np.log( binwidths / binwidths[rc_index] )

		## idx is the index for binwidth
		def CalcApproxRefPot(idx=0):
                        points = np.arange(cutoff[idx] + 0.5/2, cutoff[idx+1], 0.5)
                        values = np.power(points / bincenters[rc_index], alpha)
			avg = np.average(values)
                        tmpRefPot = np.log(avg) + np.log( binwidths[idx] / binwidths[rc_index] )
			return tmpRefPot

		## get a more accurate estimation of reference for the first bin
		[ refPot[i] = CalcApproxRefPot(i) for i in range(len(binwidths)) if binwdiths[i] >= 1 ]
		
		## calculate the observed potential defined as log( p(r) /p(rc) ) where p(r) is the predicted distance probability
		predProb = predDistMatrix[response]
		predProbRC = predProb[:, :, rc_index : rc_index+1]
		obsPot = np.log(predProb / predProbRC)

		## calculate the final potential, which is the difference between reference potential and observed potential
		potential = np.zeros_like(predDistMatrix[response])
		potential[:, :, :rc_index ] = refPot[: rc_index] - obsPot[:, :, :rc_index]

		CheckPotentialValues(m=potential)

		potentials[response] = potential

	return potentials

def CalcPotentialByDOPE(predDistMatrix, largestDistance=20, rgScale=1., minPotential=-20., maxPotential=20.):
	potentials = dict()
	for response in predDistMatrix.keys():
		labelName, labelType, subType = config.ParseResponse(response)
                if labelName not in config.allAtomPairNames:
                        print 'WARNING: unsupported response for DOPE potential: ', response
                        continue
		if not conifg.IsDiscreteLabel(labelType):
			continue

                cutoff = config.GetCutoffs(response)

		## determine the last distance bin
                rc = min(cutoff[-1], largestDistance) - 0.001
                if (rc<10.0):
                        print 'ERROR: the largest distance cutoff for DOPE is too small: ', rc
                        exit(1)
                rc_index = DistanceUtils.LabelsOfOneDistance(rc, cutoff)

		binwidths = [ d2 - d1 for d1, d2 in zip(cutoff[:-1], cutoff[1:]) ]
		bincenters = [ (d2 + d1)/2. for d1, d2 in zip(cutoff[:-1], cutoff[1:]) ]

		## a is the radius of reference sphere and rg is the estimated radius of gyration
		length = predDistMatrix[response].shape[0]
		rg = 0.395*length**(3./5)+7.257	
		a = np.sqrt(5./3) * rg * rgScale

		""" calculate n(r,a) defined in the DOPE paper. Below is the original formulation.
		## rc is the upper bound of distance between two atoms
		rc = bincenters[-1]
		if rc <= 2*a:
			#nra = 6. * np.square(bincenters * (bincenters - 2*a)) * (bincenters + 4*a) / np.power(rc,3) /(np.power(rc, 3) - 18 * np.square(a)*rc + 32 * np.power(a, 3))
		else:
			#nra = 3* np.square(bincenters * (bincenters - 2*a)) * (bincenters + 4*a) / 16. / np.power(a, 6)
		"""
		## calculate n(r,a) described in the DOPE paper. Ignore the constant factor and the denominator since they are same for all distance bins
		nra = np.square(bincenters * (bincenters - 2*a)) * (bincenters + 4*a) 

		def CalcApproxRefPot(idx=0):
			points = np.arange(cutoff[idx] + 0.5/2, cutoff[idx+1], 0.5)
			values = np.square(points * (points - 2*a)) * (points + 4*a) 
			tmpNra = np.average(values)	
			return tmpNra

		## get a more accurate estimation of nra for the first several bins if their binwidth is > 0.5
		[ nra[i] = CalcApproxRefPot(i) for i in range(len(binwidths)) if binwidths[i] >= 1 ]

		## calculate reference potential defined as log (nra(r)/nra(rc)) + log(\delta r/ \delta rc)
		## \delta(r) is equal to binwidths
		refPot = np.log( nra / nra[rc_index] * binwidths / binwidths[rc_index] )
		
	  	## calculate the observed potential defined as log( p(r) /p(rc) ) where p(r) is the predicted distance probability
                predProb = predDistMatrix[response]
                predProbRC = predProb[:, :, rc_index : rc_index+1]
                obsPot = np.log(predProb / predProbRC)

                ## calculate the final potential, which is the difference between reference and observed potential
                potential = np.zeros_like(predDistMatrix[response])
                potential[:, :, :rc_index ] = refPot[: rc_index] - obsPot[:, :, :rc_index]

		CheckPotentialValues(m=potential)

		potentials[response] = potential

	return potentials
		

def CalcPotentialBySimuRW(predDistMatrix, userRef, largestDistance=20, sequence=None, minPotential=-20., maxPotential=20.):
	f=open(userRef, 'rb')
	refData = cPickle.load(f)
	f.close()

	potentials = dict()
        for response in predDistMatrix.keys():
		labelName, labelType, _ = config.ParseResponse(response)
		if labelName not in config.allAtomPairNames:
                        print 'WARNING: unsupported response for SimuRW potential: ', response
			continue
		if not conifg.IsDiscreteLabel(labelType):
			continue

                predProb = predDistMatrix[response]

		## the first row of refProb corresponds to offset=1
                refProb = refData[response]
		if labelName != 'CbCb':
			print 'distance label name not supported yet: ', labelName
			exit(1)

		if not subType.endswith('34C'):
			print 'distance label type not supported yet: ', subType
			exit(1)

		cutoff = config.GetCutoffs(response)

		length = predProb.shape[0]
		numLabels = predProb.shape[2]
		assert numLabels == refProb.shape[1]

		## maxAllowedDist[offset] is the maximum physically feasible distance between two Cb atoms when their sequence separation is equal to offset
		maxAllowedDist = [ (offset * 3.8 + 3.06) for offset in range(length) ]
		maxAllowedDist[0] = 0
		eps = 0.00001
		maxAllowedDist[2] = 10.5 - eps
		maxAllowedDist[3] = 13.0 - eps
		maxAllowedDist[4] = 15.5 - eps
		maxAllowedDist[5] = 17.5 - eps
		maxAllowedDist[6] = 19.5 - eps

		potential = np.zeros_like(predProb)

		for i in range(0, length):
			for j in range(i+2, length):
				offset = j-i
				## find the distance bin into which the maxAllowedDist falls
				lastDistBin = DistanceUtils.LabelsOfOneDistance(maxAllowedDist[offset], cutoff)
				if lastDistBin < (numLabels - 1):
					## merge the pred prob and ref prob in the bins from lastDistBin to the end
					pred = predProb[i, j,  : lastDistBin+1]
					ref = refProb[offset-1][:lastDistBin+1]

					potential[i, j, :lastDistBin+1] = -np.log( pred / ref )
					potential[i, j, lastDistBin+1: ] = maxPotential
				else:
					## determine the last distance bin
                			rc = min(cutoff[-1], largestDistance) - 0.001
                			if (rc<10.0):
                        			print 'ERROR: the largest distance cutoff for SimuRW is too small: ', rc
                        			exit(1)
                			rc_index = DistanceUtils.LabelsOfOneDistance(rc, cutoff)

					refProbLen = refProb.shape[0]
					#idx4rc = numLabels - 2
					potential[i, j] = -np.log( predProb[i, j] / refProb[min(offset, refProbLen) -1 ] )
					potential[i, j] -= potential[i, j, rc_index]
					potential[i, j, rc_index + 1: ] = 0

				## only valid for symmetric atom pairs
				potential[j, i] = potential[i, j]

                CheckPotentialValues(potential)

		potentials[response] = potential

        return potentials

def CalcPotentialByEmpSD(predDistMatrix, userRef, largestDistance=20, sequence=None, minPotential=-20., maxPotential=20.):
	f=open(userRef, 'rb')
	refData = cPickle.load(f)
	f.close()

	potentials = dict()
        for response, predProb in predDistMatrix.iteritems():
		labelName, labelType, _ = config.ParseResponse(response)
		if labelName not in config.allAtomPairNames:
			continue
		if not conifg.IsDiscreteLabel(labelType):
			continue

                refProbList = refData[response][1]

		length = predProb.shape[0]
		if length < 400:
			refProbs = [ ref for sz, freq, ref in refProbList if sz<=1.3*length and sz>=length/1.3 ]
		else:
			refProbs = [ ref for sz, freq, ref in refProbList if sz>=350 ]

		print '#refProbMatrix: ', len(refProbs), ' for proteins with length= ', length

		refProb = np.average(refProbs, axis=0)
		potential = - np.log ( predProb / refProb )

		rc = largestDistance
		cutoff = config.GetCutoffs(response)
		lastDistBin = DistanceUtils.LabelsOfOneDistance(rc, cutoff)
		
		lastCol = potential[:, :, lastDistBin]
		potential = potential - lastCol
		potential{;, :, lastDistBin: ] = 0

		CheckPotentialValues(potential)
                potentials[response] = potential

        return potentials

def CalcPotentialByEmpSI(predDistMatrix, userRef, largestDistance=20, sequence=None, minPotential=-20., maxPotential=20.):
	f=open(userRef, 'rb')
	refData = cPickle.load(f)
	f.close()

	potentials = dict()
        for response, prdProb in predDistMatrix.iteritems():
		labelName, labelType, _ = config.ParseResponse(response)
		if labelName not in config.allAtomPairNames:
			continue
		if not conifg.IsDiscreteLabel(labelType):
			continue

                refProb = refData[response][0]
		potential = - np.log ( predProb / refProb )
		
		rc = largestDistance
		cutoff = config.GetCutoffs(response)
		lastDistBin = DistanceUtils.LabelsOfOneDistance(rc, cutoff)
		lastCol = potential[:, :, lastDistBin]
		potential = potential - lastCol
		potential[:, :, lastDistBin: ] =0

		CheckPotentialValues(potential)
                potentials[response] = potential

        return potentials

	

allRefTypesWithFiles = [ ref.upper() for ref in ['SimuRW', 'EmpSI', 'EmpSD'] ]
allRefTypes = [ 'DFIRE', 'DOPE' ] + allRefTypesWithFiles

def main(argv):

    	inputFile = None
    	targetName = None
	labelNames = ['CbCb']
	potentialFileSuffix = 'pkl'
	minPotential = -30.0
	maxPotential = 30.0
	minSeqSep = 3
	minSeqSepStr='3'

	## the largest dist cutoff
	rc = 18

	alpha4DFIRE = 1.61
	rgScale4DOPE = 1.

	## reference 
	reference = 'DFIRE'

	## refFile
	refFile = None

    	try:
        	opts, args = getopt.getopt(argv,"i:a:r:l:u:s:f:tn",["input=", "atomPairType=", "refState=", "minPotential=", "maxPotential=", "minSeqSep=", "refFile=", "textFormat=", "nonZero="])
        	print opts, args
    	except getopt.GetoptError:
        	Usage()
        	exit(1)


    	if len(opts) < 1:
        	Usage()
        	exit(1)

    	for opt, arg in opts:
		if opt in ("-i", "--input"):
	    		inputFile = arg

		elif opt in ("-a", "--atomPairType"):
			labelNames = config.ParseLabelNames(arg)

		elif opt in ("-r", "--refState"):
			fields = arg.split('+')
			reference = fields[0].upper()
			if reference not in allRefTypes:
				print 'allowed reference types: ', allRefTypes
				exit(1)

			if len(fields) > 1:
				if reference  == 'DFIRE':
					rc = np.float32(fields[1])
					if len(fields) > 2:
						alpha4DFIRE = np.float32(fields[2])

				elif reference == 'DOPE':
					rc = np.float32(fields[1])
					if len(fields) > 2:
						rgScale4DOPE = np.float32(fields[2])
				elif reference == 'SimuRW'.upper():
					rc = np.float32(fields[1])
				else:
					print 'WARNING: unsupported reference format: ', arg
				

		elif opt in ("-f", "--refFile"):
			refFile = arg
			if not os.path.isfile(refFile):
				print 'the provided file for reference state is not valid: ', refFile
				exit(1)

		elif opt in ("-l", "--minPotential"):
			minPotential = np.float32(arg)
		elif opt in ("-u", "--maxPotential"):
			maxPotential = np.float32(arg)

		elif opt in ("-s", "--minSeqSep"):
			minSeqSep = np.int32(arg)
			minSeqSepStr = arg
			if minSeqSep < 1:
				print 'ERROR: minSeqSep shall be at least 1'
				exit(1)

		elif opt in ("-t", "--textFormat"):
	    		potentialFileSuffix = '.txt'

		elif opt in ("-n", "--nonZero"):
			resetFlag = False	

		else:
	    		Usage()
	    		exit(1)

    	if inputFile is None:
		print 'Please provide an input file'
		exit(1)
    	if not os.path.isfile(inputFile):
		print 'The input file does not exist: ', inputFile
		exit(1)

	if reference in allRefTypesWithFiles and refFile is None:
		print 'The file for user-sepcified reference state is empty'
		exit(1)

        targetName = os.path.basename(inputFile).split('.')[0]

    	content = DistanceUtils.LoadRawDistProbFile(inputFile)
	assert len(content) >=6

    	name, sequence, predictedDistProb, predictedContactProb, labelWeight, labelDistribution = content[:6]
	assert labelWeight is not None, "labelWeight shall not be empty"

	## if needed, add code to here the predicted dist probability

	filenames = [ targetName, 'distPotential']
	if reference == 'DFIRE':
		potential = CalcPotentialByDFIRE(predictedDistProb, alpha=alpha4DFIRE, largestDistance=rc, minPotential=minPotential, maxPotential=maxPotential)
		filenames.extend([reference, str(rc), str(alpha4DFIRE), potentialFileSuffix])
	elif reference == 'DOPE':
		potential = CalcPotentialByDOPE(predictedDistProb, largestDistance=rc, rgScale=rgScale4DOPE, minPotential=minPotential, maxPotential=maxPotential)
		filenames.extend([reference, str(rc), str(rgScale4DOPE), potentialFileSuffix])
	elif reference == 'SimuRW'.upper():
		potential = CalcPotentialBySimuRW(predictedDistProb, refFile, largestDistance=rc, minPotential=minPotential, maxPotential=maxPotential)
		filenames.extend([reference, str(rc), potentialFileSuffix])
	else:
		print 'ERROR: unimplemented reference state: ', reference
		exit(1)

	potentialFileName = '.'.join(filenames)

	## save to PKL file
	if potentialFileName.endswith('.pkl'):
        	fh = open(potentialFileName, 'wb')
		potential_new = dict()
		distCutoffs = dict()
		for response, pot in potential.iteritems():
			labelName = config.Response2LabelName(response)
			if labelName not in set(labelNames):
				continue

			potential_new[response] = pot
			distCutoffs[response] = config.GetCutoffs(response)

		cPickle.dump((name, sequence, potential_new, distCutoffs), fh, protocol=cPickle.HIGHEST_PROTOCOL)
		fh.close()
		return

	## save to text file
	potentialFileName = targetName + '.distPotential.s' + minSeqSepStr + potentialFileSuffix
	fh = open(potentialFileName, 'w')
	fh.write('#TARGET\t' + targetName + '\n')
	fh.write('#SEQ\t' + sequence + '\n')
	fh.write('#DistanceBinBoundaries\t' + "Please check config.py" + '\n')

	for response, pot in potential.iteritems():
		labelName, labelType, subType = config.ParseResponse(response)
		if labelName not in set(labelNames):
			continue

		size = pot.shape
		for i in xrange(size[0]):
			rawPotStrs = []

			for j in xrange(i+ minSeqSep, size[1]):
				atom1, atom2 = config.SelectAtomPair(sequence, i, j, labelName)
				y = pot[i, j]

				rawPotStr = ' '.join(['AtomPair', atom1.upper(), str(i+1), atom2.upper(), str(j+1), subType] + [ "{:.4f}".format(e) for e in y ] )
				rawPotStrs.append(rawPotStr)

			if len(rawPotStrs) >0:
				fh.write('\n'.join(rawPotStrs) + '\n')

	fh.close()


if __name__ == "__main__":
    	main(sys.argv[1:])
Ejemplo n.º 10
0
def main(argv):

    	inputFile = None
    	targetName = None
	labelNames = ['CbCb']
	potentialFileSuffix = 'pkl'
	minPotential = -30.0
	maxPotential = 30.0
	minSeqSep = 3
	minSeqSepStr='3'

	## the largest dist cutoff
	rc = 18

	alpha4DFIRE = 1.61
	rgScale4DOPE = 1.

	## reference 
	reference = 'DFIRE'

	## refFile
	refFile = None

    	try:
        	opts, args = getopt.getopt(argv,"i:a:r:l:u:s:f:tn",["input=", "atomPairType=", "refState=", "minPotential=", "maxPotential=", "minSeqSep=", "refFile=", "textFormat=", "nonZero="])
        	print opts, args
    	except getopt.GetoptError:
        	Usage()
        	exit(1)


    	if len(opts) < 1:
        	Usage()
        	exit(1)

    	for opt, arg in opts:
		if opt in ("-i", "--input"):
	    		inputFile = arg

		elif opt in ("-a", "--atomPairType"):
			labelNames = config.ParseLabelNames(arg)

		elif opt in ("-r", "--refState"):
			fields = arg.split('+')
			reference = fields[0].upper()
			if reference not in allRefTypes:
				print 'allowed reference types: ', allRefTypes
				exit(1)

			if len(fields) > 1:
				if reference  == 'DFIRE':
					rc = np.float32(fields[1])
					if len(fields) > 2:
						alpha4DFIRE = np.float32(fields[2])

				elif reference == 'DOPE':
					rc = np.float32(fields[1])
					if len(fields) > 2:
						rgScale4DOPE = np.float32(fields[2])
				elif reference == 'SimuRW'.upper():
					rc = np.float32(fields[1])
				else:
					print 'WARNING: unsupported reference format: ', arg
				

		elif opt in ("-f", "--refFile"):
			refFile = arg
			if not os.path.isfile(refFile):
				print 'the provided file for reference state is not valid: ', refFile
				exit(1)

		elif opt in ("-l", "--minPotential"):
			minPotential = np.float32(arg)
		elif opt in ("-u", "--maxPotential"):
			maxPotential = np.float32(arg)

		elif opt in ("-s", "--minSeqSep"):
			minSeqSep = np.int32(arg)
			minSeqSepStr = arg
			if minSeqSep < 1:
				print 'ERROR: minSeqSep shall be at least 1'
				exit(1)

		elif opt in ("-t", "--textFormat"):
	    		potentialFileSuffix = '.txt'

		elif opt in ("-n", "--nonZero"):
			resetFlag = False	

		else:
	    		Usage()
	    		exit(1)

    	if inputFile is None:
		print 'Please provide an input file'
		exit(1)
    	if not os.path.isfile(inputFile):
		print 'The input file does not exist: ', inputFile
		exit(1)

	if reference in allRefTypesWithFiles and refFile is None:
		print 'The file for user-sepcified reference state is empty'
		exit(1)

        targetName = os.path.basename(inputFile).split('.')[0]

    	content = DistanceUtils.LoadRawDistProbFile(inputFile)
	assert len(content) >=6

    	name, sequence, predictedDistProb, predictedContactProb, labelWeight, labelDistribution = content[:6]
	assert labelWeight is not None, "labelWeight shall not be empty"

	## if needed, add code to here the predicted dist probability

	filenames = [ targetName, 'distPotential']
	if reference == 'DFIRE':
		potential = CalcPotentialByDFIRE(predictedDistProb, alpha=alpha4DFIRE, largestDistance=rc, minPotential=minPotential, maxPotential=maxPotential)
		filenames.extend([reference, str(rc), str(alpha4DFIRE), potentialFileSuffix])
	elif reference == 'DOPE':
		potential = CalcPotentialByDOPE(predictedDistProb, largestDistance=rc, rgScale=rgScale4DOPE, minPotential=minPotential, maxPotential=maxPotential)
		filenames.extend([reference, str(rc), str(rgScale4DOPE), potentialFileSuffix])
	elif reference == 'SimuRW'.upper():
		potential = CalcPotentialBySimuRW(predictedDistProb, refFile, largestDistance=rc, minPotential=minPotential, maxPotential=maxPotential)
		filenames.extend([reference, str(rc), potentialFileSuffix])
	else:
		print 'ERROR: unimplemented reference state: ', reference
		exit(1)

	potentialFileName = '.'.join(filenames)

	## save to PKL file
	if potentialFileName.endswith('.pkl'):
        	fh = open(potentialFileName, 'wb')
		potential_new = dict()
		distCutoffs = dict()
		for response, pot in potential.iteritems():
			labelName = config.Response2LabelName(response)
			if labelName not in set(labelNames):
				continue

			potential_new[response] = pot
			distCutoffs[response] = config.GetCutoffs(response)

		cPickle.dump((name, sequence, potential_new, distCutoffs), fh, protocol=cPickle.HIGHEST_PROTOCOL)
		fh.close()
		return

	## save to text file
	potentialFileName = targetName + '.distPotential.s' + minSeqSepStr + potentialFileSuffix
	fh = open(potentialFileName, 'w')
	fh.write('#TARGET\t' + targetName + '\n')
	fh.write('#SEQ\t' + sequence + '\n')
	fh.write('#DistanceBinBoundaries\t' + "Please check config.py" + '\n')

	for response, pot in potential.iteritems():
		labelName, labelType, subType = config.ParseResponse(response)
		if labelName not in set(labelNames):
			continue

		size = pot.shape
		for i in xrange(size[0]):
			rawPotStrs = []

			for j in xrange(i+ minSeqSep, size[1]):
				atom1, atom2 = config.SelectAtomPair(sequence, i, j, labelName)
				y = pot[i, j]

				rawPotStr = ' '.join(['AtomPair', atom1.upper(), str(i+1), atom2.upper(), str(j+1), subType] + [ "{:.4f}".format(e) for e in y ] )
				rawPotStrs.append(rawPotStr)

			if len(rawPotStrs) >0:
				fh.write('\n'.join(rawPotStrs) + '\n')

	fh.close()
Ejemplo n.º 11
0
def Score(pairwiseMatrix,
          potential,
          labelNames,
          outputDetails=False,
          minSeqSep=6,
          maxCstDist=None):

    totalScore = 0.0
    scores = dict()

    for response, pot in potential.iteritems():
        labelName, labelType, subType = config.ParseResponse(response)
        if labelName not in set(labelNames):
            continue
        if not pairwiseMatrix.has_key(labelName):
            print 'WARNING: the atomDistMatrix does not have distance information for atom pair:', labelName
            continue
        if not labelType.startswith('Discrete'):
            print 'ERROR: unsupported labelType: ', labelType
            exit(1)

        pm = pairwiseMatrix[labelName]
        assert pm.shape == (
            pot.shape[0], pot.shape[1]
        ), "the size of the pairwise potential not compatible with the matrix"

        if labelName in config.allAtomPairNames:
            ## discretize the distance matrix, an invalid entry -1 will have the largest label number
            labelMatrix, _, _ = DistanceUtils.DiscretizeDistMatrix(
                pm,
                config.distCutoffs[subType],
                invalidDistanceSeparated=False)
        elif labelName in config.allOrientationNames:
            labelMatrix, _, _ = OrientationUtils.DiscretizeOrientationMatrix(
                pm,
                config.distCutoffs[subType],
                distMatrix=pairwiseMatrix['CbCb'],
                invalidEntrySeparated=False)

        size = pot.shape
        m = np.mgrid[0:size[0], 0:size[1]]
        scoreMatrix = pot[m[0], m[1], labelMatrix]

        if labelName in config.allAtomPairNames and maxCstDist is not None:
            label4maxDist = DistanceUtils.LabelsOfOneDistance(
                maxCstDist, config.distCutoffs[subType])
            np.putmask(scoreMatrix, labelMatrix > label4maxDist, 0)

        scores[response] = np.sum(np.triu(scoreMatrix, minSeqSep))
        totalScore += scores[response]

        if outputDetails:
            ## note that if the potential matrix is not symmetric, we have to do something more here
            indices = np.triu_indices(size[0], k=minSeqSep, m=size[1])
            scores = scoreMatrix[indices]
            labels = labelMatrix[indices]
            for i, j, s, label in zip(indices[0], indices[1], scores, labels):
                outinfo = [
                    str(i + 1),
                    str(j + 1), apt,
                    str(label), "{:.4f}".format(s)
                ] + ["{:.3f}".format(v) for v in pot[i, j]]
                outstr = ' '.join(outinfo)
                print outstr

    return totalScore, scores
def WriteSplineConstraints(constraints,
                           savefile=None,
                           savefolder4histfile=None):
    if savefile is None:
        print 'ERROR: please specify the save file for constaints!'
        exit(1)

    if savefolder4histfile is None:
        print 'ERROR: please specify the save file for constaints!'
        exit(1)
    histfileDir = savefolder4histfile
    if not os.path.isdir(histfileDir):
        os.mkdir(histfileDir)

    expVal = 0.
    weight = 1.

    numIgnored = 0

    potStrs = []
    for constraint in constraints:
        ## write histogram to histfile
        response = constraint['response']
        labelName, _, _ = config.ParseResponse(response)
        x = constraint['x']
        y = constraint['y']
        if not np.isfinite(y).all():
            print 'WARNING: ignore one constraint since it may have an NaN or infinite value:', constraint
            numIgnored += 1
            continue

        atomNums = [str(i) for i in constraint['atomNums']]
        atomNumStr = '-'.join(atomNums)

        histfile = os.path.join(histfileDir,
                                response + '-' + atomNumStr + '.potential.txt')
        xStr = '\t'.join(['x_axis'] + ["{:.4f}".format(e) for e in x])
        yStr = '\t'.join(['y_axis'] + ["{:.4f}".format(e) for e in y])
        with open(histfile, 'w') as fh:
            fh.write('\n'.join([xStr, yStr]) + '\n')

#potStr = ' '.join(['Angle', atom1.upper(), str(i+1), atom2.upper(), str(i+2), atom3.upper(), str(j+1), 'SPLINE', description, histfile] + [ "{:.4f}".format(e) for e in [expVal, weight, binWidth] ] )
        potStrList = [constraint['type']]
        for name, number in zip(constraint['atoms'], atomNums):
            potStrList.extend([name.upper(), number])
        potStrList.append('SPLINE')
        potStrList.append(labelName)
        potStrList.append(histfile)

        potStrList.extend(['0', '1', "{:.6f}".format(constraint['binWidth'])])
        potStr = ' '.join(potStrList)

        potStrs.append(potStr)

    if numIgnored > 100:
        print 'ERROR: too many constraints are ignored:', numIgnored
        exit(1)

    if len(potStrs) > 0:
        with open(savefile, 'w') as fh:
            fh.write('\n'.join(potStrs) + '\n')

    return potStrs
Ejemplo n.º 13
0
def EstimateDistanceBound(fixedProb0, response):

    labelName, distLabelType, subType = config.ParseResponse(response)
    if 'Plus' in distLabelType:
        ## merge the last interval (which represents invalid distance) to the last second one
        size = fixedProb0.shape
        fixedProb = np.zeros((size[0], size[1], size[2] - 1),
                             dtype=fixedProb0.dtype)
        fixedProb[:, :, :-1] = fixedProb0[:, :, :-2]
        fixedProb[:, :, -1] = np.sum(fixedProb0[:, :, -2:], axis=2)
    else:
        fixedProb = fixedProb0

    if config.GetResponseProbDims(response) < 12:
        print 'ERROR: it is not meaningful to estimate inter-resdiue distance when the number of labels is < 12'
        exit(1)

    subType = distLabelType[len('Discrete'):]
    distCutoffs_original = config.distCutoffs[subType]
    distCutoffs = distCutoffs_original[1:]

    ## probability thresholds for 15 Angstrom, from long-range, to medium-range, short-range and near-range
    ## if the predicted probability for 15A is larger than the threshold, we think the distance of one residue pair shall be larger than 15A
    thresholds_4_15A = [0.75, 0.65, 0.5, 1.]
    """
	if distCutoffs[-1] > 17.0:
    		thresholds_4_15A = [0.85, 0.75, 0.6, 1. ]
	"""

    ## ratio threshods for 15A. we take at most ratio*L long-, medium- and short-range distance restraints for a protein of L residues
    ratios_4_15A = [9.5, 2.2, 2.2, 4.]

    ## determine the real probability thresholds for 15A by ratio
    cutoffs = DetermineProbThresholds(fixedProb, ratios_4_15A,
                                      distCutoffs_original)

    #print 'prob cutoffs determined by ratio are: ', cutoffs

    prob_thresholds_4_15A = [
        min(x, y) for x, y in zip(thresholds_4_15A, cutoffs)
    ]

    labelOf15 = DistanceUtils.LabelsOfOneDistance(config.InteractionLimit,
                                                  distCutoffs_original)

    #print 'the final prob cutoffs are: ', thresholds_4_15A

    halfBinWidth = np.average([
        distCutoffs[i] - distCutoffs[i - 1]
        for i in range(1, len(distCutoffs))
    ]) / 2.

    mid_distance = np.array(distCutoffs - halfBinWidth).astype(np.float32)
    upper_boundary_distance = np.array(distCutoffs).astype(np.float32)

    numDistBins = mid_distance.shape[0]
    mid_distance_sq = np.square(mid_distance)

    size = fixedProb.shape
    #print size
    #print numDistBins
    assert size[2] == numDistBins + 1

    ## estimates[:, :, 0] is the expected distance if it is less than 15A
    ## estimates[:, :, 1] is the variance
    ## estimates[:, :, 2] is the lower bound
    ## estimates[:, :, 3] is the upper bound
    ## not sure why initilize to -1
    estimates = np.full((size[0], size[1], 10), -1, dtype=np.float32)

    for i in range(size[0]):
        for j in range(size[1]):
            offset = abs(i - j)
            if offset < 2:
                continue
            elif offset < 6:
                rangeIndex = 3
            elif offset < 12:
                rangeIndex = 2
            elif offset < 24:
                rangeIndex = 1
            else:
                rangeIndex = 0

            ## if the prob of this residue pair suggest that the estimated distance is likely to be >15A, then do nothing
            if np.sum(
                    fixedProb[i, j,
                              labelOf15:]) > prob_thresholds_4_15A[rangeIndex]:
                continue

            ## renormalize the distance prob distribution by setting the prob of the largest distance bin to 0
            newProb = fixedProb[i, j, :numDistBins] / np.sum(
                fixedProb[i, j, :numDistBins])

            dist_mean = np.average(mid_distance, weights=newProb)
            dist_sq_mean = np.average(mid_distance_sq, weights=newProb)
            dist_std = np.sqrt(dist_sq_mean - np.square(dist_mean) +
                               np.square(halfBinWidth) * 1. / 3)

            ## find the bin into which dist_mean falls into
            binIndex = 0
            while dist_mean > upper_boundary_distance[binIndex]:
                binIndex = binIndex + 1

## now dist_mean <= upper_boundary_distance[binIndex] and dist_mean > upper_boundary_distance[binIndex - 1]
            upperProb = np.zeros(numDistBins - binIndex, dtype=np.float32)
            upperProb[0] = (upper_boundary_distance[binIndex] -
                            dist_mean) / (2 * halfBinWidth) * newProb[binIndex]
            upperProb[1:] = newProb[binIndex + 1:]

            lowerProb = np.zeros(binIndex + 1, dtype=np.float32)
            lowerProb[0:binIndex] = newProb[0:binIndex]
            lowerProb[binIndex] = newProb[binIndex] - upperProb[0]

            ## calculate the upper distance std
            dist_var_upper = np.dot(
                np.square(mid_distance[binIndex + 1:] - dist_mean) +
                np.square(halfBinWidth) * 1. / 3,
                upperProb[1:]) + upperProb[0] * np.square(
                    upper_boundary_distance[binIndex] - dist_mean) * 1. / 3

            ## the unnormalized distance deviation
            dist_std_upper = np.sqrt(dist_var_upper)

            ## the normalized distance deviation
            dist_std_upper2 = np.sqrt(dist_var_upper / np.sum(upperProb))
            dist_std_upper3 = (np.dot(mid_distance[binIndex + 1:] - dist_mean,
                                      upperProb[1:]) + upperProb[0] *
                               (upper_boundary_distance[binIndex] - dist_mean)
                               / 2) / np.sum(upperProb)

            ## the expected distance deviation
            dist_std_upper4 = np.sum(upperProb) * dist_std_upper2

            ## calculate the lower distance std
            if binIndex == 0:
                left_boundary = distCutoffs[0] - 2 * halfBinWidth
            else:
                left_boundary = upper_boundary_distance[binIndex - 1]
            dist_var_lower = np.dot(
                np.square(mid_distance[:binIndex] - dist_mean) +
                np.square(halfBinWidth) * 1. / 3,
                lowerProb[:binIndex]) + lowerProb[binIndex] * np.square(
                    dist_mean - left_boundary) * 1. / 3

            ## the unnormalized distance deviation
            dist_std_lower = np.sqrt(dist_var_lower)

            ## the normalized distance deviation
            dist_std_lower2 = np.sqrt(dist_var_lower / np.sum(lowerProb))
            dist_std_lower3 = (
                np.dot(dist_mean - mid_distance[:binIndex],
                       lowerProb[:binIndex]) + lowerProb[binIndex] *
                (dist_mean - left_boundary) / 2) / np.sum(lowerProb)

            ## the expected distance deviation
            dist_std_lower4 = np.sum(lowerProb) * dist_std_lower2
            """
			##only keep those residue pairs with estimated distance < 15 Angstrom
	    		if dist_mean >= 15.0:
				estimates[i, j] = np.array( [ -1. ] * 10 ).astype(np.float32)
	    		else:
	        		estimates[i, j] = np.array([dist_mean, dist_std, dist_std_lower, dist_std_upper, dist_std_lower2, dist_std_upper2, dist_std_lower3, dist_std_upper3, dist_std_lower4, dist_std_upper4]).astype(np.float32)
			"""
            estimates[i, j] = np.array([
                dist_mean, dist_std, dist_std_lower, dist_std_upper,
                dist_std_lower2, dist_std_upper2, dist_std_lower3,
                dist_std_upper3, dist_std_lower4, dist_std_upper4
            ]).astype(np.float32)

    return estimates