os.mkdir(savefolder) else: Usage() exit(1) if len(args) < 2: Usage() exit(1) seqFile = args[0] if not os.path.isfile(seqFile): print "ERROR, invalid sequence file: ", seqFile exit(1) mainSeq = LoadFASTAFile(seqFile) newSeq = copy.deepcopy(mainSeq) domainModels = args[1:] alignments = [] for dmodel in domainModels: alignment, names = ReadAlignment(dmodel, returnNames=True) tplSeq, tgtSeq = ExtractSeqFromAlignment(alignment) index = newSeq.find(tgtSeq) if index < 0: print 'ERROR: cannot map the domain alignment sequence to the whole-chain alignment sequence' print 'domain seq: ', tgtSeq print 'whole seq: ', newSeq exit(1) ## alignment[0] and alignmentp1[1] are the template and query seq in alignment, respectively
def CalcOrientationMatrix(pdbfile, sequence=None, seq_file=None): ## get pdb residues and pdb sequence structure_id = os.path.splitext(os.path.basename(pdbfile))[0] pdbseq, residueList = ExtractSeqFromPDBFile(pdbfile, name=structure_id) ## get sequence-pdbseq map mapping_seq2pdb = None if sequence is None and seq_file is None: sequence = pdbseq mapping_seq2pdb = np.arange(len(pdbseq)) elif sequence is None: sequence = LoadFASTAFile(seq_file) mapping_seq2pdb = MapSeq2PDB(sequence, pdbseq, residueList) if mapping_seq2pdb is None: print('ERROR: cannot map the sequence information to a pdb file: ' + pdbfile) exit(1) ##calculate the orientation matrices OrientationMatrix = {} OrientationMatrix['pdbseq'] = pdbseq OrientationMatrix['seq4matrix'] = str(sequence) seqLen = len(sequence) for apt in AllLabelNames: OrientationMatrix[apt] = np.ones( (seqLen, seqLen), np.float16) * NoValidCoordinates numInvalidAtoms=dict() numInvalidAtoms['TwoROri']=0 numInvalidAtoms['FourCaOri']=0 for i, j in zip(range(seqLen), mapping_seq2pdb): if j<0: continue residue_i = residueList[j] for k, l in zip(range(seqLen), mapping_seq2pdb): if l<0 : continue if i==k: for apt in AllLabelNames: OrientationMatrix[apt][i, k] = ValueOfSelf continue residue_k = residueList[l] if not HasAtoms4TwoROri(residue_i, residue_k): numInvalidAtoms['TwoROri'] += 1 pairOri = CalcOrientationOf2Residues(residue_i, residue_k) for apt, v in pairOri.iteritems(): OrientationMatrix[apt][i, k] = v if i == seqLen-1 or k==seqLen-1: continue j1 = mapping_seq2pdb[i+1] l1 = mapping_seq2pdb[k+1] if j1<0 or l1<0: continue residue_i1 = residueList[j1] residue_k1 = residueList[l1] if not residue_i.has_id('CA') or not residue_i1.has_id('CA') or not residue_k.has_id('CA') or not residue_k1.has_id('CA'): numInvalidAtoms['FourCaOri'] += 1 else: CAi = residue_i['CA'] CAi1 = residue_i1['CA'] CAk = residue_k['CA'] CAk1 = residue_k1['CA'] CaOri = CalcOrientationOf4CAs(CAi, CAi1, CAk, CAk1) for apt, v in CaOri.iteritems(): OrientationMatrix[apt][i, k] = v for apt in AllLabelNames: OrientationMatrix[apt] = (OrientationMatrix[apt]).astype(np.float16) OrientationMatrix['numInvalidAtoms'] = numInvalidAtoms print('numInvalidAtoms: ' + str(numInvalidAtoms) ) if np.any( np.fromiter(numInvalidAtoms.itervalues(), dtype=np.int32) >5 ): print('ERROR: there are too many invalid atoms in '+pdbfile) return OrientationMatrix
Usage() exit(1) try: opts, args = getopt.getopt(sys.argv[1:],"lc:s:",["modelList=", "cutoff=", "savefolder="]) except getopt.GetoptError: Usage() exit(1) if len(args) != 2: Usage() exit(1) seqFile = args[0] target = os.path.basename(seqFile).split('.')[0] sequence = LoadFASTAFile(seqFile) modelListOrFolder = args[1] if not (os.path.isdir(modelListOrFolder) or os.path.isfile(modelListOrFolder) ): print 'ERROR: invalid model list file or folder:', modelListOrFolder exit(1) InputIsModelList = False savefolder = os.getcwd() cutoff=-1 cutoffs=[1, -1, -2] for opt, arg in opts: if opt in ("-l", "--modelList"): InputIsModelList = True
else: Usage() exit(1) if len(args) < 2: Usage() exit(1) seqFile = args[0] domainModels = args[1:] if not os.path.isfile(seqFile): print "ERROR, invalid sequence file: ", seqFile exit(1) mainSeq = LoadFASTAFile(seqFile) domainSeqs = [] chainIDs = [] localQualitys = [] for dmodel in domainModels: pdbseqs, _, chains = PDBUtils.ExtractSeqFromPDBFile(dmodel) assert len(pdbseqs) == 1 domainSeqs.append(pdbseqs[0]) #print 'chain id: ', chains[0].get_id() chainIDs.append(chains[0].get_id()) locQuality = ExtractLocalQuality(dmodel) assert len(locQuality) == len(pdbseqs[0]) localQualitys.append(locQuality) print mainSeq
def main(argv): propertyType = 'PhiPsi'.upper() funcType = 'AMBERPERIODIC' allPropertyTypes = [propertyType] allFuncTypes = ['CHARMM', 'AMBERPERIODIC', 'CIRCULARHARMONIC', 'HARMONIC'] inputFile = None targetName = None weight = 1.0 wStr = 'w1' querySeqFile = None querySeq = None UseDisorderInfo = True savefolder = os.getcwd() if len(argv) < 1: Usage() exit(1) try: opts, args = getopt.getopt(argv, "a:f:w:s:q:o", [ "propertyType=", "funcType=", "weight=", "savefolder=", "querySeqFile=", "noDisorder=" ]) except getopt.GetoptError: Usage() exit(1) if len(args) != 1: Usage() exit(1) inputFile = args[0] for opt, arg in opts: if opt in ("-a", "--propertyType"): if arg.upper() not in allPropertyTypes: print 'ERROR: currently only support the following property types: ', allPropertyTypes exit(1) propertyType = arg.upper() elif opt in ("-f", "--funcType"): if arg.upper() not in allFuncTypes: print 'ERROR: currently only support the following func types: ', allFuncTypes exit(1) funcType = arg.upper() elif opt in ("-w", "--weight"): weight = np.float32(arg) wStr = 'w' + arg if weight < 0: print 'ERROR: the energy weight shall be >=0' exit(1) elif opt in ("-q", "--querySeqFile"): querySeqFile = arg elif opt in ("-s", "--savefolder"): savefolder = arg elif opt in ("-o", "--noDisorder"): UseDisorderInfo = False else: Usage() exit(1) assert propertyType == 'PhiPsi'.upper() if inputFile is None: print 'ERROR: please provide an input file for predicted property' exit(1) if not os.path.isfile(inputFile): print 'ERROR: the input file does not exist: ', inputFile exit(1) if querySeqFile is not None and os.path.isfile(querySeqFile): querySeq = LoadFASTAFile(querySeqFile) targetName = os.path.basename(inputFile).split('.')[0] content = PropertyUtils.LoadPredictedProperties(inputFile) assert len(content) >= 3 name, sequence, predProperty = content[:3] if querySeq is not None and querySeq != sequence: print 'ERROR: inconsistent sequences in the two files:', querySeqFile, inputFile exit(1) if not predProperty.has_key('PhiPsi_vonMise2d4'): print 'ERROR: the property file does not have predicted Phi/Psi: ', inputFile exit(1) PhiPsiList = predProperty['PhiPsi_vonMise2d4'] predDisorder = None if UseDisorderInfo and predProperty.has_key('disorder'): predDisorder = predProperty['disorder'] constraints = GeneratePhiPsiPotential(sequence, PhiPsiList, funcType=funcType, weight0=weight, predDisorder=predDisorder) if len(constraints) < 1: print 'ERROR: cannot generate any constraints for Phi and Psi from ', inputFile exit(1) PhiPsiFile = os.path.join( savefolder, targetName + '.PhiPsi4' + funcType + '.' + wStr + '.txt') with open(PhiPsiFile, 'w') as f: f.write('\n'.join(constraints))
def main(argv): inputFile = None labelNames = ['CbCb'] + config.TwoROriNames seqSep4Dist = 1 seqSep4Ori = 2 distPotThreshold = np.finfo(np.float32).max oriPotThreshold = 0.04 barrier = 1.0 funcType = 'SPLINE' allFuncTypes = set(['SPLINE']) topRatio4Ori = 25 topRatio4Dist = np.iinfo(np.int32).max savefolder = os.getcwd() if len(argv) < 1: Usage() exit(1) try: opts, args = getopt.getopt(argv, "a:f:s:c:b:t:q:d:", [ "labelNames=", "funcType=", "minSeqSep=", "potentialCutoff=", "barrier=", "topRatio=", "querySeqFile=", "savefolder=" ]) #print opts, args except getopt.GetoptError: Usage() exit(1) if len(args) != 1: Usage() exit(1) inputFile = args[0] querySeqFile = None querySeq = None for opt, arg in opts: if opt in ("-a", "--labelNames"): labelNames = config.ParseLabelNames(arg) elif opt in ("-s", "--minSeqSep"): fields = arg.split('+') seqSep4Dist = np.int32(fields[0]) assert seqSep4Dist > 0, "The sequence separation for a valid distance potential shall be at least 1" if len(fields) > 1: seqSep4Ori = np.int32(fields[1]) assert seqSep4Ori > 1, "The sequence separation for a valid orientation shall be at least 2" elif opt in ("-c", "--potentialCutoff"): fields = arg.split('+') oriPotThreshold = np.float32(fields[0]) if len(fields) > 1: distPotThreshold = np.float32(fields[1]) elif opt in ("-f", "--funcType"): funcType = arg.upper() if funcType not in allFuncTypes: print 'ERROR: unsupported potential func type:', funcType exit(1) elif opt in ("-b", "--barrier"): barrier = np.float32(arg) assert barrier >= 0 elif opt in ("-t", "--topRatio"): fields = arg.split('+') topRatio4Ori = np.float32(fields[0]) if len(fields) > 1: topRatio4Dist = np.float32(fields[1]) elif opt in ("-q", "--querySeqFile"): querySeqFile = arg elif opt in ("-d", "--savefolder"): savefolder = arg else: Usage() exit(1) if inputFile is None: print 'ERROR: Please provide a generic distance/orientation potential file for input' exit(1) if not os.path.isfile(inputFile): print 'ERROR: the input potential file does not exist: ', inputFile exit(1) if querySeqFile is not None and os.path.isfile(querySeqFile): querySeq = LoadFASTAFile(querySeqFile) if not os.path.isdir(savefolder): os.mkdir(savefolder) ## load up the potential file with open(inputFile, 'r') as fh: potData = cPickle.load(fh) if querySeq is not None and querySeq != potData[1]: print 'ERROR: inconsistent sequences in', querySeqFile, inputFile exit(1) allConstraints = GenerateSplinePotential(potData, labelNames=labelNames, topRatio4Dist=topRatio4Dist, topRatio4Ori=topRatio4Ori, minSeqSep4Dist=seqSep4Dist, minSeqSep4Ori=seqSep4Ori, distPotThreshold=distPotThreshold, oriPotThreshold=oriPotThreshold) ## save the Rosetta constraints into files target = os.path.basename(inputFile).split('.')[0] rosettaPotentialFileName = os.path.join( savefolder, target + '.pairPotential4Rosetta.SPLINE.txt') savefolder4histfile = os.path.join(savefolder, 'SplinePotential4' + target + '/') WriteSplineConstraints(allConstraints, savefile=rosettaPotentialFileName, savefolder4histfile=savefolder4histfile)