os.mkdir(savefolder)

        else:
            Usage()
            exit(1)

    if len(args) < 2:
        Usage()
        exit(1)

    seqFile = args[0]

    if not os.path.isfile(seqFile):
        print "ERROR, invalid sequence file: ", seqFile
        exit(1)
    mainSeq = LoadFASTAFile(seqFile)
    newSeq = copy.deepcopy(mainSeq)

    domainModels = args[1:]
    alignments = []
    for dmodel in domainModels:
        alignment, names = ReadAlignment(dmodel, returnNames=True)
        tplSeq, tgtSeq = ExtractSeqFromAlignment(alignment)
        index = newSeq.find(tgtSeq)
        if index < 0:
            print 'ERROR: cannot map the domain alignment sequence to the whole-chain alignment sequence'
            print 'domain seq: ', tgtSeq
            print 'whole  seq: ', newSeq
            exit(1)

        ## alignment[0] and alignmentp1[1] are the template and query seq in alignment, respectively
def CalcOrientationMatrix(pdbfile, sequence=None, seq_file=None):
    
	## get pdb residues and pdb sequence 
    	structure_id = os.path.splitext(os.path.basename(pdbfile))[0]
    	pdbseq, residueList = ExtractSeqFromPDBFile(pdbfile, name=structure_id)
    
    	## get sequence-pdbseq map
    	mapping_seq2pdb = None
    	if sequence is None and seq_file is None:
        	sequence = pdbseq
        	mapping_seq2pdb = np.arange(len(pdbseq))
    	elif sequence is None:
        	sequence = LoadFASTAFile(seq_file)        
       	mapping_seq2pdb = MapSeq2PDB(sequence, pdbseq, residueList)
    	if mapping_seq2pdb is None:
        	print('ERROR: cannot map the sequence information to a pdb file: ' + pdbfile)
        	exit(1)
    
    	##calculate the orientation matrices
    	OrientationMatrix = {}
    	OrientationMatrix['pdbseq'] = pdbseq
    	OrientationMatrix['seq4matrix'] = str(sequence)
    
    	seqLen = len(sequence)
    	for apt in AllLabelNames:
        	OrientationMatrix[apt] = np.ones( (seqLen, seqLen), np.float16) * NoValidCoordinates

	numInvalidAtoms=dict()
	numInvalidAtoms['TwoROri']=0
	numInvalidAtoms['FourCaOri']=0    

    	for i, j in zip(range(seqLen), mapping_seq2pdb):
        	if j<0:
            		continue
        	residue_i = residueList[j]
        	for k, l in zip(range(seqLen), mapping_seq2pdb):
            		if l<0 :
                		continue
            		if i==k:
                		for apt in AllLabelNames:
                			OrientationMatrix[apt][i, k] = ValueOfSelf
				continue

            		residue_k = residueList[l]
			if not HasAtoms4TwoROri(residue_i, residue_k):
				numInvalidAtoms['TwoROri'] += 1

            		pairOri = CalcOrientationOf2Residues(residue_i, residue_k)
            		for apt, v in pairOri.iteritems():
            			OrientationMatrix[apt][i, k] = v

	    		if i == seqLen-1 or k==seqLen-1:
				continue

	    		j1 = mapping_seq2pdb[i+1]
	    		l1 = mapping_seq2pdb[k+1]
	    		if j1<0 or l1<0:
				continue
	    		residue_i1 = residueList[j1]
	    		residue_k1 = residueList[l1]

			if not residue_i.has_id('CA') or not residue_i1.has_id('CA') or not residue_k.has_id('CA') or not residue_k1.has_id('CA'):
				numInvalidAtoms['FourCaOri'] += 1
			else:
	    			CAi = residue_i['CA']
	    			CAi1 = residue_i1['CA']
	    			CAk = residue_k['CA']
	    			CAk1 = 	residue_k1['CA']

	    			CaOri = CalcOrientationOf4CAs(CAi, CAi1, CAk, CAk1)
	    			for apt, v in CaOri.iteritems():
	    				OrientationMatrix[apt][i, k] = v

    	for apt in AllLabelNames:
		OrientationMatrix[apt] = (OrientationMatrix[apt]).astype(np.float16)

	OrientationMatrix['numInvalidAtoms'] = numInvalidAtoms
	print('numInvalidAtoms: ' + str(numInvalidAtoms) )

	if np.any( np.fromiter(numInvalidAtoms.itervalues(), dtype=np.int32) >5 ):
                print('ERROR: there are too many invalid atoms in '+pdbfile)
	
    	return OrientationMatrix
	Usage()
	exit(1)

try:
	opts, args = getopt.getopt(sys.argv[1:],"lc:s:",["modelList=", "cutoff=", "savefolder="])
except getopt.GetoptError:
        Usage()
        exit(1)

if len(args) != 2:
        Usage()
        exit(1)

seqFile = args[0]
target = os.path.basename(seqFile).split('.')[0]
sequence = LoadFASTAFile(seqFile)

modelListOrFolder = args[1]
if not (os.path.isdir(modelListOrFolder) or os.path.isfile(modelListOrFolder) ):
	print 'ERROR: invalid model list file or folder:', modelListOrFolder
	exit(1)

InputIsModelList = False

savefolder = os.getcwd()
cutoff=-1
cutoffs=[1, -1, -2]

for opt, arg in opts:
	if opt in ("-l", "--modelList"):
		InputIsModelList = True
Beispiel #4
0
        else:
            Usage()
            exit(1)

    if len(args) < 2:
        Usage()
        exit(1)

    seqFile = args[0]
    domainModels = args[1:]

    if not os.path.isfile(seqFile):
        print "ERROR, invalid sequence file: ", seqFile
        exit(1)
    mainSeq = LoadFASTAFile(seqFile)

    domainSeqs = []
    chainIDs = []
    localQualitys = []
    for dmodel in domainModels:
        pdbseqs, _, chains = PDBUtils.ExtractSeqFromPDBFile(dmodel)
        assert len(pdbseqs) == 1
        domainSeqs.append(pdbseqs[0])
        #print 'chain id: ', chains[0].get_id()
        chainIDs.append(chains[0].get_id())
        locQuality = ExtractLocalQuality(dmodel)
        assert len(locQuality) == len(pdbseqs[0])
        localQualitys.append(locQuality)

    print mainSeq
def main(argv):

    propertyType = 'PhiPsi'.upper()
    funcType = 'AMBERPERIODIC'

    allPropertyTypes = [propertyType]
    allFuncTypes = ['CHARMM', 'AMBERPERIODIC', 'CIRCULARHARMONIC', 'HARMONIC']

    inputFile = None
    targetName = None
    weight = 1.0
    wStr = 'w1'

    querySeqFile = None
    querySeq = None

    UseDisorderInfo = True
    savefolder = os.getcwd()

    if len(argv) < 1:
        Usage()
        exit(1)
    try:
        opts, args = getopt.getopt(argv, "a:f:w:s:q:o", [
            "propertyType=", "funcType=", "weight=", "savefolder=",
            "querySeqFile=", "noDisorder="
        ])
    except getopt.GetoptError:
        Usage()
        exit(1)

    if len(args) != 1:
        Usage()
        exit(1)

    inputFile = args[0]

    for opt, arg in opts:
        if opt in ("-a", "--propertyType"):
            if arg.upper() not in allPropertyTypes:
                print 'ERROR: currently only support the following property types: ', allPropertyTypes
                exit(1)
            propertyType = arg.upper()

        elif opt in ("-f", "--funcType"):
            if arg.upper() not in allFuncTypes:
                print 'ERROR: currently only support the following func types: ', allFuncTypes
                exit(1)
            funcType = arg.upper()

        elif opt in ("-w", "--weight"):
            weight = np.float32(arg)
            wStr = 'w' + arg
            if weight < 0:
                print 'ERROR: the energy weight shall be >=0'
                exit(1)

        elif opt in ("-q", "--querySeqFile"):
            querySeqFile = arg

        elif opt in ("-s", "--savefolder"):
            savefolder = arg

        elif opt in ("-o", "--noDisorder"):
            UseDisorderInfo = False

        else:
            Usage()
            exit(1)

    assert propertyType == 'PhiPsi'.upper()

    if inputFile is None:
        print 'ERROR: please provide an input file for predicted property'
        exit(1)
    if not os.path.isfile(inputFile):
        print 'ERROR: the input file does not exist: ', inputFile
        exit(1)

    if querySeqFile is not None and os.path.isfile(querySeqFile):
        querySeq = LoadFASTAFile(querySeqFile)

    targetName = os.path.basename(inputFile).split('.')[0]

    content = PropertyUtils.LoadPredictedProperties(inputFile)
    assert len(content) >= 3
    name, sequence, predProperty = content[:3]

    if querySeq is not None and querySeq != sequence:
        print 'ERROR: inconsistent sequences in the two files:', querySeqFile, inputFile
        exit(1)

    if not predProperty.has_key('PhiPsi_vonMise2d4'):
        print 'ERROR: the property file does not have predicted Phi/Psi: ', inputFile
        exit(1)

    PhiPsiList = predProperty['PhiPsi_vonMise2d4']
    predDisorder = None
    if UseDisorderInfo and predProperty.has_key('disorder'):
        predDisorder = predProperty['disorder']

    constraints = GeneratePhiPsiPotential(sequence,
                                          PhiPsiList,
                                          funcType=funcType,
                                          weight0=weight,
                                          predDisorder=predDisorder)

    if len(constraints) < 1:
        print 'ERROR: cannot generate any constraints for Phi and Psi from ', inputFile
        exit(1)

    PhiPsiFile = os.path.join(
        savefolder, targetName + '.PhiPsi4' + funcType + '.' + wStr + '.txt')
    with open(PhiPsiFile, 'w') as f:
        f.write('\n'.join(constraints))
def main(argv):

    inputFile = None

    labelNames = ['CbCb'] + config.TwoROriNames
    seqSep4Dist = 1
    seqSep4Ori = 2
    distPotThreshold = np.finfo(np.float32).max
    oriPotThreshold = 0.04
    barrier = 1.0

    funcType = 'SPLINE'
    allFuncTypes = set(['SPLINE'])

    topRatio4Ori = 25
    topRatio4Dist = np.iinfo(np.int32).max

    savefolder = os.getcwd()

    if len(argv) < 1:
        Usage()
        exit(1)
    try:
        opts, args = getopt.getopt(argv, "a:f:s:c:b:t:q:d:", [
            "labelNames=", "funcType=", "minSeqSep=", "potentialCutoff=",
            "barrier=", "topRatio=", "querySeqFile=", "savefolder="
        ])
        #print opts, args
    except getopt.GetoptError:
        Usage()
        exit(1)

    if len(args) != 1:
        Usage()
        exit(1)

    inputFile = args[0]
    querySeqFile = None
    querySeq = None

    for opt, arg in opts:
        if opt in ("-a", "--labelNames"):
            labelNames = config.ParseLabelNames(arg)

        elif opt in ("-s", "--minSeqSep"):
            fields = arg.split('+')
            seqSep4Dist = np.int32(fields[0])
            assert seqSep4Dist > 0, "The sequence separation for a valid distance potential shall be at least 1"

            if len(fields) > 1:
                seqSep4Ori = np.int32(fields[1])
                assert seqSep4Ori > 1, "The sequence separation for a valid orientation shall be at least 2"

        elif opt in ("-c", "--potentialCutoff"):
            fields = arg.split('+')
            oriPotThreshold = np.float32(fields[0])
            if len(fields) > 1:
                distPotThreshold = np.float32(fields[1])

        elif opt in ("-f", "--funcType"):
            funcType = arg.upper()
            if funcType not in allFuncTypes:
                print 'ERROR: unsupported potential func type:', funcType
                exit(1)

        elif opt in ("-b", "--barrier"):
            barrier = np.float32(arg)
            assert barrier >= 0

        elif opt in ("-t", "--topRatio"):
            fields = arg.split('+')
            topRatio4Ori = np.float32(fields[0])
            if len(fields) > 1:
                topRatio4Dist = np.float32(fields[1])

        elif opt in ("-q", "--querySeqFile"):
            querySeqFile = arg

        elif opt in ("-d", "--savefolder"):
            savefolder = arg
        else:
            Usage()
            exit(1)

    if inputFile is None:
        print 'ERROR: Please provide a generic distance/orientation potential file for input'
        exit(1)

    if not os.path.isfile(inputFile):
        print 'ERROR: the input potential file does not exist: ', inputFile
        exit(1)

    if querySeqFile is not None and os.path.isfile(querySeqFile):
        querySeq = LoadFASTAFile(querySeqFile)

    if not os.path.isdir(savefolder):
        os.mkdir(savefolder)

    ## load up the potential file
    with open(inputFile, 'r') as fh:
        potData = cPickle.load(fh)

    if querySeq is not None and querySeq != potData[1]:
        print 'ERROR: inconsistent sequences in', querySeqFile, inputFile
        exit(1)

    allConstraints = GenerateSplinePotential(potData,
                                             labelNames=labelNames,
                                             topRatio4Dist=topRatio4Dist,
                                             topRatio4Ori=topRatio4Ori,
                                             minSeqSep4Dist=seqSep4Dist,
                                             minSeqSep4Ori=seqSep4Ori,
                                             distPotThreshold=distPotThreshold,
                                             oriPotThreshold=oriPotThreshold)

    ## save the Rosetta constraints into files
    target = os.path.basename(inputFile).split('.')[0]
    rosettaPotentialFileName = os.path.join(
        savefolder, target + '.pairPotential4Rosetta.SPLINE.txt')
    savefolder4histfile = os.path.join(savefolder,
                                       'SplinePotential4' + target + '/')
    WriteSplineConstraints(allConstraints,
                           savefile=rosettaPotentialFileName,
                           savefolder4histfile=savefolder4histfile)