Ejemplos de myPDBParser en Python

Lenguaje de programación: Python

Namespace/Package Name: computeFeatures.structStep.myPDBParser

Clase / Tipo: myPDBParser

Ejemplos en hotexamples.com: 6

Python myPDBParser - 6 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de computeFeatures.structStep.myPDBParser.myPDBParser extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

get_structure(9)

Métodos usados con frecuencia

get_structure (9)

Ejemplo n.º 1

Mostrar archivo

    def computeOneFile(self, pdbFName):
        '''
      Computes DSSP for a given pdb file
      @param pdbFName: str. fname to pdb file
    '''
        parser = PDBParser(QUIET=True)
        struct = parser.get_structure("pdbStruct", pdbFName)
        prefixAndChainTypeId = self.getExtendedPrefix(pdbFName)
        rawDsspOutName = os.path.join(self.outPathRaw,
                                      prefixAndChainTypeId + ".dssp.tab")

        proc = Popen([self.dsspBinPath, '-i', pdbFName, '-o', rawDsspOutName],
                     stdin=PIPE,
                     stdout=PIPE,
                     stderr=PIPE)
        output = proc.communicate()
        if output == None or decodeFun(output[1]) != "":
            ##    no atoms read before TER record
            print("Error when computing DSSP: %s" % pdbFName)
            print(output)
            ## 'no atoms read before TER record \nTER
            if not decodeFun(
                    output[1]).startswith('no atoms read before TER record'):
                self.createFileForError(struct, rawDsspOutName)
        self.processDSSP(prefixAndChainTypeId, struct, rawDsspOutName)
        #    raw_input("enter to continue")
        return None

Ejemplo n.º 2

Mostrar archivo

def moveAndWriteAsPDBIfMmcif(fnameIn, fnameOut, removeInput=False):
  conf= Configuration()
  minNumResidues, maxNumResidues= conf.minNumResiduesPartner, conf.maxNumResiduesPartner
  try:
    parser= PDBParser(QUIET=True)
    struct= parser.get_structure("pdbStruct", fnameIn)
    totalNumRes=0
    for chain in struct[0]:
      nResInChain= len(chain.get_list())
      totalNumRes+= nResInChain
    if not ( minNumResidues < totalNumRes < maxNumResidues):
      raise BadNumberOfResidues(totalNumRes)
    else:
      writter=PDBIO()
      writter.set_structure(struct)
      writter.save(fnameOut)
      if removeInput: os.remove(fnameIn)
      return True
  except Exception as e:
    print("Error in moveAndWriteAsPDBIfMmcif !!!", e)
    return False

Ejemplo n.º 3

Mostrar archivo

    def __init__(self,
                 rFname,
                 lFname,
                 computedFeatsRootDir=None,
                 boundAvailable=True,
                 res2res_dist=6.0,
                 isForPrediction=False,
                 statusManager=None):
        '''
      @param rFname: str. path to receptor pdb file
      @param lFname: str. path to ligand pdb file      
      @param computedFeatsRootDir: str. path where features will be stored
      @param boundAvailable: bool. True if bound structures are available. False otherwise. Bound structures must be located
                                   at the same path that unbound structures and need to be named as in the following example:
                                    1A2K_l_u.pdb  1A2K_r_b.pdb
      @param res2res_dist: float. max distance between any heavy atoms of 2 amino acids to be considered as interacting
                                  (Amstrongs)
      @param isForPrediction: bool. False to compute contacts between amino acids, True otherwise. Positive contacts will
                                    be tag as 1, negative as -1. If True, all amino acids will have as tag np.nan
      @param statusManager: class that implements .setStatus(msg) to communicate
    '''
        FeaturesComputer.__init__(self, rFname, lFname, computedFeatsRootDir)

        self.prefixR = os.path.split(rFname)[1].split(".")[0].split("_")[0]
        self.prefixL = os.path.split(lFname)[1].split(".")[0].split("_")[0]
        if self.prefixR == self.prefixL:
            self.prefix = self.prefixR
        else:
            if "<" in self.prefixL:
                raise FeatureComputerException(
                    "Error. Ligand pdbFile name %s must not contain '<' or '>' character"
                    % lFname)
            if ">" in self.prefixR:
                raise FeatureComputerException(
                    "Error. Receptor pdbFile name %s must not contain '<' or'>' character"
                    % rFname)
            self.prefixR = self.getExtendedPrefix(rFname)
            self.prefixL = self.getExtendedPrefix(lFname)

            self.prefix = self.prefixL + "<->" + self.prefixR

        self.isForPrediction = isForPrediction
        self.res2res_dist = res2res_dist
        self.boundAvailable = boundAvailable
        self.outPath = myMakeDir(self.computedFeatsRootDir,
                                 "common/contactMaps")
        self.outName = os.path.join(self.outPath, self.prefix + ".cMap.tab")
        self.parser = PDBParser(QUIET=True)
        #    self.ppb=PPBuilder( radius= 200) # To not worry for broken chains
        self.ppb = CaPPBuilder()
        self.computeFun = self.contactMapOneComplex

Ejemplo n.º 4

Mostrar archivo

Archivo: seqsManager.py Proyecto: minghao2016/BIPSPI

    def __init__(self, rFname, lFname, computedFeatsRootDir=None):
        '''
      @param rFname: str. path to receptor pdb or fasta file
      @param lFname: str. path to ligand pdb or fasta file
      @param computedFeatsRootDir: str. path where features will be stored. If None they will be stored
                                        at default path (assigned in ../Config.py)
    '''

        SeqFeatComputer.__init__(self, rFname, lFname, computedFeatsRootDir)
        self.outPath = myMakeDir(self.computedFeatsRootDir, "extractedSeqs")
        self.fastaOutDir = myMakeDir(self.outPath, "seqsData")
        self.seqToStructDir = myMakeDir(self.outPath, "seqToStructMap")

        self.parser = PDBParser(QUIET=True)
        self.seqsDict = {}
        self.seqToStruct = {}
        self.structToSeq = {}
        self.seqToStructFnames = {}

Ejemplo n.º 5

Mostrar archivo

class ContactMapper(FeaturesComputer):
    '''
    Extends FeaturesComputer class. Extracts res and chainIds for training and predicting and computes contact maps 
    for training for a given complex
  '''
    def __init__(self,
                 rFname,
                 lFname,
                 computedFeatsRootDir=None,
                 boundAvailable=True,
                 res2res_dist=6.0,
                 isForPrediction=False,
                 statusManager=None):
        '''
      @param rFname: str. path to receptor pdb file
      @param lFname: str. path to ligand pdb file      
      @param computedFeatsRootDir: str. path where features will be stored
      @param boundAvailable: bool. True if bound structures are available. False otherwise. Bound structures must be located
                                   at the same path that unbound structures and need to be named as in the following example:
                                    1A2K_l_u.pdb  1A2K_r_b.pdb
      @param res2res_dist: float. max distance between any heavy atoms of 2 amino acids to be considered as interacting
                                  (Amstrongs)
      @param isForPrediction: bool. False to compute contacts between amino acids, True otherwise. Positive contacts will
                                    be tag as 1, negative as -1. If True, all amino acids will have as tag np.nan
      @param statusManager: class that implements .setStatus(msg) to communicate
    '''
        FeaturesComputer.__init__(self, rFname, lFname, computedFeatsRootDir)

        self.prefixR = os.path.split(rFname)[1].split(".")[0].split("_")[0]
        self.prefixL = os.path.split(lFname)[1].split(".")[0].split("_")[0]
        if self.prefixR == self.prefixL:
            self.prefix = self.prefixR
        else:
            if "<" in self.prefixL:
                raise FeatureComputerException(
                    "Error. Ligand pdbFile name %s must not contain '<' or '>' character"
                    % lFname)
            if ">" in self.prefixR:
                raise FeatureComputerException(
                    "Error. Receptor pdbFile name %s must not contain '<' or'>' character"
                    % rFname)
            self.prefixR = self.getExtendedPrefix(rFname)
            self.prefixL = self.getExtendedPrefix(lFname)

            self.prefix = self.prefixL + "<->" + self.prefixR

        self.isForPrediction = isForPrediction
        self.res2res_dist = res2res_dist
        self.boundAvailable = boundAvailable
        self.outPath = myMakeDir(self.computedFeatsRootDir,
                                 "common/contactMaps")
        self.outName = os.path.join(self.outPath, self.prefix + ".cMap.tab")
        self.parser = PDBParser(QUIET=True)
        #    self.ppb=PPBuilder( radius= 200) # To not worry for broken chains
        self.ppb = CaPPBuilder()
        self.computeFun = self.contactMapOneComplex

    def mapBoundToUnbound(self,
                          structureUnbound,
                          structureBound,
                          skipBoundChainsIds=set([])):
        '''
      Obtains correspondence between unbound structure and bound structure when available. Returns a dictionary
      that maps bound_residue --> equivalent unbound_residue
      
      @param structureUnbound: Bio.PDB.Structure. Structure in bound state
      @param structureBound:   Bio.PDB.Structure. Structure in unbound state
      @param skipBoundChainsIds:   Set of Chars. Set of chain ids that will be skipped for calculations. 
      @return bound2UnboundMapDict: Dict {Bio.PDB.Residue (from bound structure): Bio.PDB.Residue (from unbound structure)}
      
    '''
        bound2UnboundMapDict = {}
        pp_list_unbound = self.ppb.build_peptides(structureUnbound,
                                                  aa_only=False)
        if structureBound is None:  # if there is no bound structure, use just unbound.
            boundToUnboundMap = lambda x: x  #For a given residue will return the same residue
            pp_list_bound = pp_list_unbound
        else:
            pp_list_bound = self.ppb.build_peptides(structureBound,
                                                    aa_only=False)
            mapper = BoundUnboundMapper(
                pp_list_unbound,
                pp_list_bound)  # res_bound->res_unbound mapper object
            mapper.build_correspondence()
            boundToUnboundMap = mapper.mapBoundToUnbound  #For a given bound residue will return its unbound equivalent
        for pp in pp_list_bound:
            for resBound in pp:
                chainBound = resBound.get_full_id()[2]  # str chainId
                if chainBound in skipBoundChainsIds: continue
                resUnbound = boundToUnboundMap(resBound)
                if not resUnbound is None:  #In case there is no equivalent unbound residue for a given bound residue
                    bound2UnboundMapDict[resBound] = resUnbound
        return bound2UnboundMapDict

    def fixHomooligomers(self, structureL, structureR, positiveContacts,
                         chainsInContactL, chainsInContactR):
        '''
      For each interacting pair of residues (resL_1, resR_2), it will add to positiveContacts (res_1L', resR_2) and/or
      (resL_1, resR_2') where resL_1' is an equivalent residue in homooligomers of ligand
      
      @param structureL: Bio.PDB.Structure. Structure of ligand
      @param structureR:   Bio.PDB.Structure. Structure of receptor
      @param positiveContacts:  [(ligandResId, receptorResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue
      @param chainsInContactL:  [(ligandResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue
      @param chainsInContactR:  [(receptorResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue
      @return positiveContacts, chainsInContactL, chainsInContactR. Updated with equivalent residues interactions added
      
    '''
        pp_list_l = self.ppb.build_peptides(structureL, aa_only=False)
        equivalentLmapper = HomoOligomerFinder(pp_list_l,
                                               positiveContacts,
                                               chainType="l")
        positiveContacts, chainsInContactL = equivalentLmapper.update_interactions(
        )
        pp_list_r = self.ppb.build_peptides(structureR, aa_only=False)
        equivalentRmapper = HomoOligomerFinder(pp_list_r,
                                               positiveContacts,
                                               chainType="r")
        positiveContacts, chainsInContactR = equivalentRmapper.update_interactions(
        )
        return positiveContacts, chainsInContactL, chainsInContactR

    def getPairsOfResiduesInContact(self, structureL, structureR):
        '''
      Computes which amino acids of ligand are in contact with which amino acids of receptor
      
      @param structureL: Bio.PDB.Structure. Structure of ligand (bound state if available)
      @param structureR:   Bio.PDB.Structure. Structure of receptor (bound state if available).
      @return positiveContacts:  Set {(Bio.PDB.Residue.fullResId (from bound structure structureL), Bio.PDB.Residue.fullResId (from bound structure structureR))}
      @return chainsNotContactL: Set { str(chainId structureL)}
      @return chainsNotContactR: Set { str(chainId structureR)}
      
    '''
        try:
            atomListL = [
                atom for atom in structureL.child_list[0].get_atoms()
                if not atom.name.startswith("H")
            ]
        except IndexError:
            raise NoValidPDBFile("Problems parsing pdbFile 1")
        try:
            atomListR = [
                atom for atom in structureR.child_list[0].get_atoms()
                if not atom.name.startswith("H")
            ]
        except IndexError:
            raise NoValidPDBFile("Problems parsing pdbFile 2")

        searcher = NeighborSearch(atomListL + atomListR)
        allNeigs = searcher.search_all(self.res2res_dist, level="R")
        lStructId = structureL.get_id()
        rStructId = structureR.get_id()
        positiveContacts = set([])
        chainsInContactL = set([])
        chainsInContactR = set([])
        for res1, res2 in allNeigs:
            pdbId1, modelId1, chainId1, resId1 = res1.get_full_id()
            pdbId2, modelId2, chainId2, resId2 = res2.get_full_id()
            fullResId1 = res1.get_full_id()
            fullResId2 = res2.get_full_id()
            if pdbId1 == lStructId and pdbId2 == rStructId:
                positiveContacts.add((fullResId1, fullResId2))
                chainsInContactL.add(fullResId1[2])
                chainsInContactR.add(fullResId2[2])
            elif pdbId1 == rStructId and pdbId2 == lStructId:
                positiveContacts.add((fullResId2, fullResId1))
                chainsInContactL.add(fullResId2[2])
                chainsInContactR.add(fullResId1[2])
        if CONSIDER_HOMOOLIG_AS_POS:
            positiveContacts, chainsInContactL, chainsInContactR = self.fixHomooligomers(
                structureL, structureR, positiveContacts, chainsInContactL,
                chainsInContactR)
        allChainsL = set([elem.get_id() for elem in structureL[0].get_list()])
        allChainsR = set([elem.get_id() for elem in structureR[0].get_list()])
        chainsNotContactL = allChainsL.difference(chainsInContactL)
        chainsNotContactR = allChainsR.difference(chainsInContactR)
        return positiveContacts, chainsNotContactL, chainsNotContactR

    def contactMapOneComplex(self):
        '''
      Computes the contact map of a complex. Initial input for complex codification. Contact map is a file written at
      self.computedFeatsRootDir/common/contactMaps/ with name prefix.cMap.tab where prefix is either the common name of
      ligand and receptor pdb files or the concatenation of ligand and receptor names.
      1A2K_l_u.pdb and 1A2K_r_u.pdb  --> 1A2K.cMap.tab
      1A2K_l_u.pdb and 1A22.pdb  --> 1A2K-1A22.cMap.tab
      
    '''
        outName = self.outName
        print(outName)
        if os.path.isfile(outName):
            print('Already computed contact map')
            return 0
        lStructId = self.prefixL + "_l_u.pdb"
        rStructId = self.prefixR + "_r_u.pdb"
        structureL_u = self.parser.get_structure(lStructId, self.lFname)
        structureR_u = self.parser.get_structure(rStructId, self.rFname)
        if self.boundAvailable == False or self.isForPrediction:
            structureL_b = None
            structureR_b = None
        else:
            try:
                lStructId_b = self.prefix + "_l_b.pdb"
                rStructId_b = self.prefix + "_r_b.pdb"
                lFname_b = os.path.join(
                    os.path.split(self.lFname)[0], lStructId_b)
                rFname_b = os.path.join(
                    os.path.split(self.rFname)[0], rStructId_b)
                structureL_b = self.parser.get_structure(lStructId_b, lFname_b)
                structureR_b = self.parser.get_structure(rStructId_b, rFname_b)
            except IOError as e:  # in this case there are just unbound pdbs available
                structureL_b = None
                structureR_b = None

        if self.isForPrediction:
            positiveContacts = None
            chainsNotContactR = set([])
            chainsNotContactL = set([])
        elif structureL_b is None or structureR_b is None:  #Compute contacs in bound structures
            positiveContacts, chainsNotContactL, chainsNotContactR = self.getPairsOfResiduesInContact(
                structureL_u, structureR_u)
        else:  #Compute contacs in unbound structures
            positiveContacts, chainsNotContactL, chainsNotContactR = self.getPairsOfResiduesInContact(
                structureL_b, structureR_b)

        if JUST_INTERACTING_CHAINS == False:
            chainsNotContactR = set([])
            chainsNotContactL = set([])

        rResDict = self.mapBoundToUnbound(structureR_u,
                                          structureR_b,
                                          skipBoundChainsIds=chainsNotContactR)
        lResDict = self.mapBoundToUnbound(structureL_u,
                                          structureL_b,
                                          skipBoundChainsIds=chainsNotContactL)
        nResiduesL = len(lResDict)
        nResiduesR = len(rResDict)
        if not (self.minNumResiduesPartner < nResiduesL <
                self.maxNumResiduesPartner):
            raise BadNumberOfResidues(nResiduesL, "1")
        if not (self.minNumResiduesPartner < nResiduesR <
                self.maxNumResiduesPartner):
            raise BadNumberOfResidues(nResiduesR, "2")

        outFile = open(outName, "w")
        outFile.write(
            "chainIdL structResIdL resNameL chainIdR structResIdR resNameR categ\n"
        )
        #    print(sorted(lResDict, key= lambda x: x.get_id()))
        #    a= raw_input()
        try:
            for resL_bound in sorted(lResDict, key=lambda x: x.get_full_id()):
                #      print(resL_bound.get_full_id())
                resL_unbound = lResDict[resL_bound]
                pdbIdL, modelL, chainIdL, resIdL = resL_unbound.get_full_id()
                resIdL = self.makeStrResId(resIdL)

                try:
                    letraL = three_to_one(resL_unbound.resname)
                    if letraL != three_to_one(resL_bound.resname): continue
                except KeyError:
                    continue
                for resR_bound in sorted(rResDict,
                                         key=lambda x: x.get_full_id()):
                    resR_unbound = rResDict[resR_bound]
                    pdbIdR, modelR, chainIdR, resIdR = resR_unbound.get_full_id(
                    )
                    try:
                        letraR = three_to_one(resR_unbound.resname)
                        if letraR != three_to_one(resR_bound.resname): continue
                    except KeyError:
                        continue
                    if self.isForPrediction:
                        categ = np.nan
                    elif (resL_bound.get_full_id(),
                          resR_bound.get_full_id()) in positiveContacts:
                        categ = 1
                    else:
                        categ = -1
                    resIdR = self.makeStrResId(resIdR)
                    if chainIdL == " ": chainIdL = "*"
                    if chainIdR == " ": chainIdR = "*"
                    #        print("%s %s %s %s %s %s %s\n" %(chainIdL, resIdL, letraL, chainIdR, resIdR, letraR, categ) )
                    #        raw_input("enter")
                    outFile.write("%s %s %s %s %s %s %s\n" %
                                  (chainIdL, resIdL, letraL, chainIdR, resIdR,
                                   letraR, categ))
            outFile.close()
        except (KeyboardInterrupt, Exception):
            print("Exception happend computing %s" % outName)
            tryToRemove(outName)
            raise

    def makeStrResId(self, resId):
        valList = [str(elem) for elem in resId[1:]]
        finalId = "".join(valList).strip()
        return finalId

Ejemplo n.º 6

Mostrar archivo

Archivo: seqsManager.py Proyecto: minghao2016/BIPSPI

class SeqsManager(SeqFeatComputer):
    '''
  Extends SeqFeatComputer class. Extracts sequences from pdbFiles to fasta files. Then it allows to easily enumerate
  sequences (all letters) and fasta files and also allows for mapping between seqIndices and structIndices and 
  vice versa
  '''
    def __init__(self, rFname, lFname, computedFeatsRootDir=None):
        '''
      @param rFname: str. path to receptor pdb or fasta file
      @param lFname: str. path to ligand pdb or fasta file
      @param computedFeatsRootDir: str. path where features will be stored. If None they will be stored
                                        at default path (assigned in ../Config.py)
    '''

        SeqFeatComputer.__init__(self, rFname, lFname, computedFeatsRootDir)
        self.outPath = myMakeDir(self.computedFeatsRootDir, "extractedSeqs")
        self.fastaOutDir = myMakeDir(self.outPath, "seqsData")
        self.seqToStructDir = myMakeDir(self.outPath, "seqToStructMap")

        self.parser = PDBParser(QUIET=True)
        self.seqsDict = {}
        self.seqToStruct = {}
        self.structToSeq = {}
        self.seqToStructFnames = {}

    def computeOneFile(self, fileName, chainType):
        '''
      Gets the seq to struct mapping for a given pdb file
      @param fileName: str. fname to pdb file
      @param chainType: str. "l" for ligand and "r" for receptor
    '''
        if self.checkIfIsFasta(fileName):
            self.computeOneFileFromFasta(fileName, chainType)
        else:
            self.computeOneFileFromPDB(fileName, chainType)

    def addResiduesToSeqToStructMap(self, chainType, chainId, seqStr, resIds):

        assert len(seqStr) == len(resIds)
        fastaFname = self.seqsDict[chainType][chainId][1]
        self.seqsDict[chainType][chainId] = (seqStr, fastaFname)
        f = open(fastaFname, "w")
        f.write(">" + os.path.split(fastaFname)[-1] + "\n" + seqStr)
        f.close()
        #    print(self.seqToStruct)
        #    raw_input("press enter to continue")
        for key in sorted(self.seqToStruct):
            if key[:2] == (chainType, chainId):
                del self.seqToStruct[key]
        listForFile = []
        for i, resId in enumerate(resIds):
            key_seqStruct = (chainType, chainId, i)
            flag = " "
            if resId == "-": continue
            if not resId[-1].isdigit():
                flag = resId[-1]
                resId = resId[:-1]
            else:
                resId = int(resId)
            self.seqToStruct[key_seqStruct] = (" ", resId, flag)
            key_structSeq = (chainType, chainId, (" ", resId, flag))
            if key_structSeq in self.structToSeq:
                self.structToSeq[key_structSeq] = i
                listForFile.append("%d;%s;%s" %
                                   (i, seqStr, str((" ", resId, flag))))

        outName, prefixAndChainType = self.seqToStructFnames[(chainType,
                                                              chainId)]
        f = open(outName, "w")
        f.write(">" + prefixAndChainType + "_" + chainId + "\n" +
                "\n".join(listForFile))
        f.close()

#    print(self.seqToStruct)
#    raw_input("press enter to continue")

    def computeOneFileFromPDB(self, fileName, chainType):
        '''
      Gets the seq to struct mapping for a given pdb file
      @param fileName: str. fname to pdb file
      @param chainType: str. "l" for ligand and "r" for receptor
    '''
        self.seqsDict[chainType] = {}

        if not (fileName.endswith("_r_u.pdb")
                or fileName.endswith("_l_u.pdb")):
            prefixAndChainType = (
                os.path.split(fileName)[-1]).split(".pdb")[0] + "_" + chainType
        else:
            prefixAndChainType = (
                os.path.split(fileName)[-1]).split("_u.pdb")[0]
##    print(fileName)
        struct = self.parser.get_structure(prefixAndChainType, fileName)
        for chain in struct[0]:
            chainId = chain.get_id()
            if chainId == " ":
                chainId = "*"
            nResStandard = sum(
                [1 for res in chain if is_aa(res, standard=True)])
            resList = [
                res for res in sorted(chain.child_list,
                                      key=lambda x: x.get_id()[1:])
                if is_aa(res, standard=False)
            ]  #New version feature
            nResAll = len(resList)
            #      print(chainId, len(resList))
            if nResStandard < int(0.5 * nResAll):
                continue  #skip if most residues are not standard
            if len(
                    resList
            ) > SMALL_CHAINS_LIMIT:  #Too small chains will not be considered
                sequence = []
                resIds = []
                for i, res in enumerate(resList):
                    try:
                        letter = three_to_one(res.resname)
                    except KeyError:  # New version feature
                        print("Exception", res)
                        letter = "X"
                        if i == (nResAll - 1):
                            break  #This case is for TCGR....TLRX where X is GDP or other molecule
                    resId = res.get_full_id()[3]
                    sequence.append(letter)
                    ##          print(sequence[-1])
                    resIds.append("%d;%s;%s" % (i, letter, resId))
                    self.seqToStruct[(chainType, chainId, i)] = resId
                    self.structToSeq[(chainType, chainId, resId)] = i
                sequence = "".join(sequence)
                outNameFasta = os.path.join(
                    self.fastaOutDir,
                    prefixAndChainType + "_" + chainId + "_u.fasta")
                f = open(outNameFasta, "w")
                f.write(">" + prefixAndChainType + "_" + chainId + "\n" +
                        sequence)
                f.close()

                resIds = "\n".join(resIds)

                outName = os.path.join(
                    self.seqToStructDir,
                    prefixAndChainType + "_" + chainId + "_u.seqStruMap")
                self.seqToStructFnames[(chainType,
                                        chainId)] = (outName,
                                                     prefixAndChainType)
                f = open(outName, "w")
                f.write(">" + prefixAndChainType + "_" + chainId + "\n" +
                        resIds)
                f.close()

                self.seqsDict[chainType][chainId] = (sequence, outNameFasta)

    def computeOneFileFromFasta(self, fileName, chainType):
        '''
      Gets the seq to struct mapping for a given fasta file (dummy, used for compatibility)
      @param fileName: str. fname to fasta file
      @param chainType: str. "l" for ligand and "r" for receptor
    '''
        self.seqsDict[chainType] = {}

        if not (fileName.endswith("_r_u.fasta")
                or fileName.endswith("_l_u.fasta")):
            prefixAndChainType = (
                os.path.split(fileName)[-1]).split(".pdb")[0] + "_" + chainType
        else:
            prefixAndChainType = (
                os.path.split(fileName)[-1]).split("_u.fasta")[0]


#    print(fileName,prefixAndChainType, chainType)

        seq = self.parseFasta(fileName)
        chainId = None
        if chainType == "l":
            chainId = "L"
        elif chainType == "r":
            chainId = "R"
        else:
            raise FeatureComputerException(
                "Error, bad chainType %s for computeOneFileFromFasta, must be 'r' or 'l'"
                % chainType)

        if len(
                seq
        ) > SMALL_CHAINS_LIMIT:  #Too small chains will not be considered
            sequence = []
            resIds = []
            for i, resname in enumerate(seq):
                if not resname in d1_to_index:
                    resname = "X"
                resId = (' ', i, ' ')
                sequence.append(resname)
                ##          print(sequence[-1])
                resIds.append("%d;%s;%s" % (i, resname, resId))
                self.seqToStruct[(chainType, chainId, i)] = resId
                self.structToSeq[(chainType, chainId, resId)] = i
            sequence = "".join(sequence)
            outNameFasta = os.path.join(
                self.fastaOutDir,
                prefixAndChainType + "_" + chainId + "_u.fasta")
            if not os.path.isfile(outNameFasta):
                f = open(outNameFasta, "w")
                f.write(">" + prefixAndChainType + "_" + chainId + "\n" +
                        sequence)
                f.close()

            resIds = "\n".join(resIds)

            outName = os.path.join(
                self.seqToStructDir,
                prefixAndChainType + "_" + chainId + "_u.seqStruMap")
            if not os.path.isfile(outName):
                f = open(outName, "w")
                f.write(">" + prefixAndChainType + "_" + chainId + "\n" +
                        resIds)
                f.close()

            self.seqsDict[chainType][chainId] = (sequence, outNameFasta)
        else:
            raise FeatureComputerException(
                "Error, %s is to short (10 AA min) " % prefixAndChainType)

    def getSeq(self, chainType, chainId):
        '''
      gets the desired seq of a pdb complex that matches chainType and chainId
      @param chainType: str. "l" for ligand and "r" for receptor
      @param chainId: str. chain id of sequence to be extracted
      @return  (seqStr:str, fastaFileName:str). Tuple. 1st element sequence as str and second element
               path to a fasta file where sequence was extracted
    '''
        return self.seqsDict[chainType][chainId]

    def enumSeqs(self, chainType):
        '''
      yields all the sequences contained at pdb file.
      @param chainType: str. "l" for ligand and "r" for receptor
      @yields chainType: str chainId:str, (seqStr:str, fastaFileName:str)  
                            chainType and chain id of sequence to be extracted. 

    '''
        for chainId in self.seqsDict[chainType]:
            yield chainType, chainId

    def getSeqsOutDir(self):
        '''
      returns the path where fasta files are saved for each of the chains of a pdb file
      @return fastaOutDir:str. Path to fasta file
    '''
        return self.fastaOutDir

    def getSeqsMapperOutDir(self):
        '''
      returns the path where seq to struct maps have been saved (No needed)
      @return seqToStructDir:str. Path to seq to struct map
    '''
        return self.seqToStructDir

    def seqToStructIndex(self, chainType, chainId, seqIndex, asString=False):
        '''
      gets the struct id that matches to the chainType, chainId, seqIndex asked
      @param chainType: str. "l" for ligand and "r" for receptor
      @param chainId: str. chain id of sequence
      @param seqIndex: int. Position of the residue at the sequence
      @param asString: boolean. If False, the returned value will be a tuple provided by Bio.PDB.Residue.get_full_id()[3]
                                If True it will be a string obtained by concatenating the tuple and using strip()
      @return None if there is no mapping. Otherwise
                Bio.PDB.Residue.get_full_id()[3] if asString== False
                "".join(Bio.PDB.Residue.get_full_id()[3][1:])).strip() if asString== True
    '''
        try:
            #      print(">>", self.seqToStruct[(chainType, chainId, seqIndex)])
            #      raw_input("press enter to continue")
            if asString:
                valList = [
                    str(elem) for elem in self.seqToStruct[(chainType, chainId,
                                                            seqIndex)]
                ]
                valList = "".join(valList[1:]).strip()
                return valList
            else:
                return self.seqToStruct[(chainType, chainId, seqIndex)]
        except KeyError:
            return None

    def structToSeqIndex(self, chainType, chainId, structIndex):
        '''
      gets the seq index that matches to the chainType, chainId, structIndex asked
      @param chainType: str. "l" for ligand and "r" for receptor
      @param chainId: str. chain id of sequence
      @param structIndex: int. resId as the one provided by Bio.PDB.Residue.get_full_id()[3]
      @return seqIndex: integer. The sequential index of residue with resId==structIndex
    '''
        return self.structToSeq[(chainType, chainId, structIndex)]