def __init__(self, out_dir=None): """ Create parsing and writing objects, specify output directory. """ self.parser = PDBParser(QUIET=True) self.writer = PDB.PDBIO() if out_dir is None: out_dir = os.path.join(os.getcwd(), "chain_PDBs") self.out_dir = out_dir
def computeOneFile(self, pdbFName): ''' Computes DSSP for a given pdb file @param pdbFName: str. fname to pdb file ''' parser = PDBParser(QUIET=True) struct = parser.get_structure("pdbStruct", pdbFName) prefixAndChainTypeId = self.getExtendedPrefix(pdbFName) rawDsspOutName = os.path.join(self.outPathRaw, prefixAndChainTypeId + ".dssp.tab") proc = Popen([self.dsspBinPath, '-i', pdbFName, '-o', rawDsspOutName], stdin=PIPE, stdout=PIPE, stderr=PIPE) output = proc.communicate() if output == None or decodeFun(output[1]) != "": ## no atoms read before TER record print("Error when computing DSSP: %s" % pdbFName) print(output) ## 'no atoms read before TER record \nTER if not decodeFun( output[1]).startswith('no atoms read before TER record'): self.createFileForError(struct, rawDsspOutName) self.processDSSP(prefixAndChainTypeId, struct, rawDsspOutName) # raw_input("enter to continue") return None
def __init__(self, rFname, lFname, computedFeatsRootDir=None, boundAvailable=True, res2res_dist=6.0, isForPrediction=False, statusManager=None): ''' @param rFname: str. path to receptor pdb file @param lFname: str. path to ligand pdb file @param computedFeatsRootDir: str. path where features will be stored @param boundAvailable: bool. True if bound structures are available. False otherwise. Bound structures must be located at the same path that unbound structures and need to be named as in the following example: 1A2K_l_u.pdb 1A2K_r_b.pdb @param res2res_dist: float. max distance between any heavy atoms of 2 amino acids to be considered as interacting (Amstrongs) @param isForPrediction: bool. False to compute contacts between amino acids, True otherwise. Positive contacts will be tag as 1, negative as -1. If True, all amino acids will have as tag np.nan @param statusManager: class that implements .setStatus(msg) to communicate ''' FeaturesComputer.__init__(self, rFname, lFname, computedFeatsRootDir) self.prefixR = os.path.split(rFname)[1].split(".")[0].split("_")[0] self.prefixL = os.path.split(lFname)[1].split(".")[0].split("_")[0] if self.prefixR == self.prefixL: self.prefix = self.prefixR else: if "<" in self.prefixL: raise FeatureComputerException( "Error. Ligand pdbFile name %s must not contain '<' or '>' character" % lFname) if ">" in self.prefixR: raise FeatureComputerException( "Error. Receptor pdbFile name %s must not contain '<' or'>' character" % rFname) self.prefixR = self.getExtendedPrefix(rFname) self.prefixL = self.getExtendedPrefix(lFname) self.prefix = self.prefixL + "<->" + self.prefixR self.isForPrediction = isForPrediction self.res2res_dist = res2res_dist self.boundAvailable = boundAvailable self.outPath = myMakeDir(self.computedFeatsRootDir, "common/contactMaps") self.outName = os.path.join(self.outPath, self.prefix + ".cMap.tab") self.parser = PDBParser(QUIET=True) # self.ppb=PPBuilder( radius= 200) # To not worry for broken chains self.ppb = CaPPBuilder() self.computeFun = self.contactMapOneComplex
def __init__(self, rFname, lFname, computedFeatsRootDir=None, statusManager=None): ''' @param statusManager: class that implements .setStatus(msg) to communicate ''' StructFeatComputer.__init__(self, rFname, lFname, computedFeatsRootDir, statusManager=statusManager) self.outPath = myMakeDir(self.computedFeatsRootDir, "halfSphereExpos") self.parser = PDBParser(QUIET=True)
def __init__(self, rFname, lFname, computedFeatsRootDir=None): ''' @param rFname: str. path to receptor pdb or fasta file @param lFname: str. path to ligand pdb or fasta file @param computedFeatsRootDir: str. path where features will be stored. If None they will be stored at default path (assigned in ../Config.py) ''' SeqFeatComputer.__init__(self, rFname, lFname, computedFeatsRootDir) self.outPath = myMakeDir(self.computedFeatsRootDir, "extractedSeqs") self.fastaOutDir = myMakeDir(self.outPath, "seqsData") self.seqToStructDir = myMakeDir(self.outPath, "seqToStructMap") self.parser = PDBParser(QUIET=True) self.seqsDict = {} self.seqToStruct = {} self.structToSeq = {} self.seqToStructFnames = {}
def __init__(self, rFname, lFname, computedFeatsRootDir=None, statusManager=None): ''' @param rFname: str. path to receptor pdb file @param lFname: str. path to ligand pdb file @param computedFeatsRootDir: str. path where features will be stored @param statusManager: class that implements .setStatus(msg) to communicate ''' StructFeatComputer.__init__(self, rFname, lFname, computedFeatsRootDir, statusManager=statusManager) self.outPath = myMakeDir(self.computedFeatsRootDir, "distanceMatricesData") self.parser = PDBParser(QUIET=True)
def insertPredsInBfactor(pdbFnameIn, scoresFnameIn, pdfFnameOut): parser = PDBParser(QUIET=True) struct = parser.get_structure(os.path.basename(pdbFnameIn), pdbFnameIn) scores = pd.read_table(scoresFnameIn, sep='\s+', header='infer', comment="#", dtype={ "chainIdL": str, "chainIdR": str, "structResIdL": str, "structResIdR": str, "chainId": str, "structResId": str, "resId": str }) scoresDict = {} for i in range(scores.shape[0]): scoresDict[(scores["chainId"][i], scores["resId"][i])] = scores["prediction"][i] # print( sorted([ (key, scoresDict[key]) for key in scoresDict])) for chain in struct[0]: chainId = chain.get_id() if chainId == " ": chainId = "*" for res in chain: resId = res.get_id() strResId = (str(resId[1]) + resId[2]).strip() print(strResId, (chainId, strResId) in scoresDict) if (chainId, strResId) in scoresDict: predVal = scoresDict[(chainId, strResId)] else: predVal = 0.0 # print(predVal) for atom in res: atom.set_bfactor(predVal) writer = PDB.PDBIO() writer.set_structure(struct) writer.save(pdfFnameOut)
def moveAndWriteAsPDBIfMmcif(fnameIn, fnameOut, removeInput=False): conf= Configuration() minNumResidues, maxNumResidues= conf.minNumResiduesPartner, conf.maxNumResiduesPartner try: parser= PDBParser(QUIET=True) struct= parser.get_structure("pdbStruct", fnameIn) totalNumRes=0 for chain in struct[0]: nResInChain= len(chain.get_list()) totalNumRes+= nResInChain if not ( minNumResidues < totalNumRes < maxNumResidues): raise BadNumberOfResidues(totalNumRes) else: writter=PDBIO() writter.set_structure(struct) writter.save(fnameOut) if removeInput: os.remove(fnameIn) return True except Exception as e: print("Error in moveAndWriteAsPDBIfMmcif !!!", e) return False
def getBoundResList(fname_bound, fname_unbound, listOfDictsChainToResId): parser= PDBParser(QUIET=True) structureUnbound= parser.get_structure(fname_unbound, fname_unbound) structureBound= parser.get_structure(fname_bound, fname_bound) ppb= PPBuilder() pp_list_unbound= ppb.build_peptides(structureUnbound, aa_only= False) pp_list_bound= ppb.build_peptides(structureBound, aa_only= False) mapper= BoundUnboundMapper( pp_list_unbound,pp_list_bound) mapper.build_correspondence() newDictsList=[] for dictOfChainsToRes in listOfDictsChainToResId: tempDict={} for chainId_u in dictOfChainsToRes: for resId_u in sorted(dictOfChainsToRes[chainId_u]): chainId_b_resId_b= mapper.mapUnboundToBoundUsingId(" " if chainId_u=="*" else chainId_u, resId_u) # print(chainId_u, resId_u, chainId_b_resId_b) if chainId_b_resId_b is None: continue chainId_b, resId_b= chainId_b_resId_b if not chainId_b in tempDict: tempDict[chainId_b]=[] tempDict[chainId_b].append( resId_b) newDictsList.append( tempDict) return newDictsList
def computeOneFile(self, fileName): ''' Computes VORONOI neighbours for a given pdb file @param fileName: str. fname to pdb file ''' voro_file = os.path.split(fileName)[-1] voro_file = voro_file.split(".")[0] + ".voro" outName = os.path.join(self.outPath, voro_file) if os.path.isfile(outName): print('Already computed VORONOI') return 0 struct = PDBParser(QUIET=True).get_structure("oneStruct", fileName) ids = [] coords = [] for res in struct[0].get_residues(): if not is_aa(res, standard=True): continue structName, modelId, chainId, resIdTuple = res.get_full_id() if resIdTuple[2] != " ": continue if chainId == " ": chainId = "*" resIdRepr = str(resIdTuple[1]) + "_" + chainId try: coords.append(res["CA"].get_coord()) ids.append(resIdRepr) except KeyError: try: coords.append(res["CB"].get_coord()) ids.append(resIdRepr) except KeyError: coords.append(res.get_list()[0].get_coord()) ids.append(resIdRepr) inStream = "\n".join([ " ".join([str(elem) for elem in coord_line]) for coord_line in coords ]) inStream = "3 #sample 3-d input\n" + str( len(coords)) + "\n" + inStream proc = Popen("%s Fv Qt | cut -d\" \" -f2-5" % (self.qhullExeDir), shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE) out = proc.communicate(inStream) if not out[0][0].isdigit() or len(out[1]) > 0: raise FeatureComputerException( "Error in qhull execution for %s: %s" % (fileName, out)) out = out[0] lines = out.split("\n") try: with open(outName, "w") as f: for line in lines[1:]: splitLine = [int(elem) for elem in line.split()] if len(splitLine) < 1: continue print("badLine") alreadyPrinted = set([]) for resAndChainInd1 in splitLine: for resAndChainInd2 in splitLine: if resAndChainInd1 != resAndChainInd2: dist = distance.euclidean( coords[resAndChainInd1], coords[resAndChainInd2]) if dist < self.maxDist: if (ids[resAndChainInd1], ids[resAndChainInd2] ) not in alreadyPrinted: f.write("%s\t%s\n" % (ids[resAndChainInd1], ids[resAndChainInd2])) alreadyPrinted.add( (ids[resAndChainInd1], ids[resAndChainInd2])) if (ids[resAndChainInd2], ids[resAndChainInd1] ) not in alreadyPrinted: f.write("%s\t%s\n" % (ids[resAndChainInd2], ids[resAndChainInd1])) alreadyPrinted.add( (ids[resAndChainInd2], ids[resAndChainInd1])) except (KeyboardInterrupt, Exception): print("Exception happend computing %s" % outName) tryToRemove(outName) raise return 0