Esempio n. 1
0
File: PPI.py Progetto: cbxx/Phantom
    def _processPDB(self):
        """ Processes the PDB file, i.e. adds all relevant atoms to a dataframe and determines the peptide an phosphate chains"""
        self._logger.info("Processing PDB")
        ppb=CaPPBuilder()
        d = []
        peptide_chains = {}
        phosphate_chains = set()
        # Loop over all chains
        for chain_idx, chain in enumerate(self._pdb[0]):
            isPeptideChain = False
            isPhosphateChain = False
            # try to create peptide sequence
            pp = ppb.build_peptides(chain)
            if pp:
                # tag chain as peptide chain
                isPeptideChain = True
                peptide_chains[chain.get_id()] = pp[0].get_sequence().tostring()
            # loop over residues in chain
            for residue in chain:
                resn = residue.get_resname()                
                if resn in ['PTR', 'TPO', 'SEP']:
                    # Chain contains a phospho-residue; tag as phosphateChain
                    isPhosphateChain = True
                    phosphate_chains.add(chain.get_id())
                # process atoms only if residue is not water and is part of a peptide or phospho chain
                if residue.get_id()[0] != 'W' and (isPeptideChain or isPhosphateChain):
                    resi = residue.get_id()[1]
                    inscode = residue.get_id()[2].strip()
                    hasPhosphate = False

                    for atom in residue:
                        vdw = -1
                        if isPeptideChain:
                            elem = atom.element
                            if elem:
                                elem = elem if len(elem)==1 else elem[0]+elem[1].lower()
                                vdw = self._periodicTable.GetRvdw(self._periodicTable.GetAtomicNumber(elem))
                        coords = atom.get_coord()
                        sn = atom.get_serial_number()
                        # append to dataframe
                        d.append((chain.get_id().strip(), chain_idx, resn, resi, inscode, sn, atom.get_name(), isPeptideChain, isPhosphateChain, coords[0], coords[1], coords[2], vdw))

                        if resn in ['PTR', 'TPO', 'SEP'] and atom.get_name().strip() == 'P' and len(coords)==3:
                            hasPhosphate = True

                if resn in ['PTR', 'TPO', 'SEP'] and not hasPhosphate:
                    # del residue because no annotated phosphate
                    d = d[:-len(residue)]


        if len(d) == 0:
            raise Exception('No amino acids found.')
        # save list to dataframe
        data = np.zeros((len(d), ), dtype=[('chain', 'a1'), ('chain_idx', 'a1'), ('resn', 'a3'), ('resi', 'i4'), ('inscode', 'a1'), ('sn', 'i4'), ('an', 'a4'), ('peptideChain', 'b'), ('phosphateChain', 'b'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('vdw', 'f4')])
        data[:] = d
        idf = pd.DataFrame(data)
        idf[['peptideChain', 'phosphateChain']] = idf[['peptideChain', 'phosphateChain']].astype('bool')
        self._df = idf
        self._peptideChains = peptide_chains
        self._phosphateChains = phosphate_chains
Esempio n. 2
0
    def __init__(self, model, radius=12.0, offset=0):
        """Initialize.

        A residue's exposure is defined as the number of CA atoms around
        that residues CA atom. A dictionary is returned that uses a L{Residue}
        object as key, and the residue exposure as corresponding value.

        :param model: the model that contains the residues
        :type model: L{Model}

        :param radius: radius of the sphere (centred at the CA atom)
        :type radius: float

        :param offset: number of flanking residues that are ignored in
                       the calculation of the number of neighbors
        :type offset: int

        """
        assert(offset >= 0)
        ppb = CaPPBuilder()
        ppl = ppb.build_peptides(model)
        fs_map = {}
        fs_list = []
        fs_keys = []
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                fs = 0
                r1 = pp1[i]
                if not is_aa(r1) or not r1.has_id('CA'):
                    continue
                ca1 = r1['CA']
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i - j) <= offset:
                            continue
                        r2 = pp2[j]
                        if not is_aa(r2) or not r2.has_id('CA'):
                            continue
                        ca2 = r2['CA']
                        d = (ca2 - ca1)
                        if d < radius:
                            fs += 1
                res_id = r1.get_id()
                chain_id = r1.get_parent().get_id()
                # Fill the 3 data structures
                fs_map[(chain_id, res_id)] = fs
                fs_list.append((r1, fs))
                fs_keys.append((chain_id, res_id))
                # Add to xtra
                r1.xtra['EXP_CN'] = fs
        AbstractPropertyMap.__init__(self, fs_map, fs_keys, fs_list)
Esempio n. 3
0
    def __init__(self, model, radius=12.0, offset=0):
        """
        A residue's exposure is defined as the number of CA atoms around
        that residues CA atom. A dictionary is returned that uses a L{Residue}
        object as key, and the residue exposure as corresponding value.

        @param model: the model that contains the residues
        @type model: L{Model}

        @param radius: radius of the sphere (centred at the CA atom)
        @type radius: float

        @param offset: number of flanking residues that are ignored in the calculation            of the number of neighbors
        @type offset: int

        """
        assert (offset >= 0)
        ppb = CaPPBuilder()
        ppl = ppb.build_peptides(model)
        fs_map = {}
        fs_list = []
        fs_keys = []
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                fs = 0
                r1 = pp1[i]
                if not is_aa(r1) or not r1.has_id('CA'):
                    continue
                ca1 = r1['CA']
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i - j) <= offset:
                            continue
                        r2 = pp2[j]
                        if not is_aa(r2) or not r2.has_id('CA'):
                            continue
                        ca2 = r2['CA']
                        d = (ca2 - ca1)
                        if d < radius:
                            fs += 1
                res_id = r1.get_id()
                chain_id = r1.get_parent().get_id()
                # Fill the 3 data structures
                fs_map[(chain_id, res_id)] = fs
                fs_list.append((r1, fs))
                fs_keys.append((chain_id, res_id))
                # Add to xtra
                r1.xtra['EXP_CN'] = fs
        AbstractPropertyMap.__init__(self, fs_map, fs_keys, fs_list)
Esempio n. 4
0
    def __init__(self, model, radius, offset=0, hse_up_key='HSE_U', hse_down_key='HSE_D', angle_key=None, check_chain_breaks=False, 
                 check_knots=False, receptor=None, signprot=None):
        """
        @param model: model
        @type model: L{Model}

        @param radius: HSE radius
        @type radius: float

        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int

        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string

        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string

        @param angle_key: key used to store the angle between CA-CB and CA-pCB in
        the entity.xtra attribute
        @type angle_key: string
        """
        assert(offset>=0)
        # For PyMOL visualization
        self.ca_cb_list=[]
        ppb=CaPPBuilder()
        ppl=ppb.build_peptides(model)
        hse_map={}
        hse_list=[]
        hse_keys=[]
        ### GP
        if model.get_id()!=0:
            model = model[0]
        residues_in_pdb,residues_with_proper_CA=[],[]
        if check_chain_breaks==True:
            # for m in model:
                for chain in model:
                    for res in chain:
                        # try:
                            if is_aa(res):
                                residues_in_pdb.append(res.get_id()[1])
                        # except:
                        #     if is_aa(chain):
                        #         residues_in_pdb.append(chain.get_id()[1])
                        #         print('chain', chain, res)
                        #         break
        self.clash_pairs = []
        self.chain_breaks = []
        
        if check_knots:
            possible_knots = PossibleKnots(receptor, signprot)
            knot_resis = possible_knots.get_resnums()
            self.remodel_resis = {}

        for pp1 in ppl:
            for i in range(0, len(pp1)):
                residues_with_proper_CA.append(pp1[i].get_id()[1])
                if i==0:
                    r1=None
                else:
                    r1=pp1[i-1]
                r2=pp1[i]
                if i==len(pp1)-1:
                    r3=None
                else:
                    r3=pp1[i+1]
                # This method is provided by the subclasses to calculate HSE
                result=self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle=result
                hse_u=0
                hse_d=0
                ca2=r2['CA'].get_vector()
                residue_up=[]   ### GP
                residue_down=[] ### GP
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        try:
                            if r2.get_id()[1]-1!=r1.get_id()[1] or r2.get_id()[1]+1!=r3.get_id()[1]:
                                pass
                            else:
                                raise Exception
                        except:
                            if pp1 is pp2 and abs(i-j)<=offset:
                            # neighboring residues in the chain are ignored
                                continue
                        ro=pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao=ro['CA'].get_vector()
                        d=(cao-ca2)
                        if d.norm()<radius:
                            if d.angle(pcb)<(math.pi/2):
                                hse_u+=1
                                ### GP
                                # Puts residues' names in a list that were found in the upper half sphere
                                residue_up.append(ro)

                                ### end of GP code
                            else:
                                hse_d+=1
                                ### GP
                                # Puts residues' names in a list that were found in the lower half sphere
                                residue_down.append(ro)
                                ### end of GP code
                res_id=r2.get_id()
                chain_id=r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)]=(hse_u, hse_d, angle)
                hse_list.append((r2, (residue_up, residue_down, hse_u, hse_d, angle)))
                ### GP residue_up and residue_down added to hse_list
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key]=hse_u
                r2.xtra[hse_down_key]=hse_d
                if angle_key:
                    r2.xtra[angle_key]=angle

                ### GP checking for knots
                if check_knots:
                    for knot in knot_resis:
                        if knot[0][1]==pp1[i].get_id()[1] and knot[0][0]==pp1[i].get_parent().get_id():
                            print(pp1[i].get_parent().get_id(),pp1[i])
                            for r in residue_up:
                                if r.get_parent().get_id()==knot[1][0] and r.get_id()[1] in knot[1][1]:
                                    print('close: ', r.get_parent().get_id(),r)
                                    resi_range = [knot[1][1][0], knot[1][1][-1]]
                                    if knot[1][0] not in self.remodel_resis:
                                        self.remodel_resis[knot[1][0]] = [resi_range]
                                    else:
                                        if resi_range not in self.remodel_resis[knot[1][0]]:
                                            self.remodel_resis[knot[1][0]].append(resi_range)

                ### GP checking for atom clashes
                include_prev, include_next = False, False
                try:
                    if pp1[i].get_id()[1]-1!=pp1[i-1].get_id()[1]:
                        include_prev = True
                except:
                    include_prev = False
                try:
                    if pp1[i].get_id()[1]+1!=pp1[i+1].get_id()[1]:
                        include_next = True
                except:
                    include_next = False
                for atom in pp1[i]:
                    ref_vector = atom.get_vector()
                    for other_res in residue_up:
                        try:
                            if other_res==pp1[i-1] and include_prev==False:
                                continue
                            elif len(pp1)>=i+1 and other_res==pp1[i+1] and include_next==False:
                                continue
                            else:
                                raise Exception
                        except:
                            for other_atom in other_res:
                                other_vector = other_atom.get_vector()
                                d = other_vector-ref_vector
                                if d.norm()<2:
                                    if len(str(pp1[i]['CA'].get_bfactor()).split('.')[1])==1:
                                        clash_res1 = float(str(pp1[i]['CA'].get_bfactor())+'0')
                                    else:
                                        clash_res1 = pp1[i]['CA'].get_bfactor()
                                    if len(str(other_res['CA'].get_bfactor()).split('.')[1])==1:
                                        clash_res2 = float(str(other_res['CA'].get_bfactor())+'0')
                                    else:
                                        clash_res2 = other_res['CA'].get_bfactor()
                                    self.clash_pairs.append([(clash_res1, pp1[i].get_id()[1]), (clash_res2, other_res.get_id()[1])])
        if check_chain_breaks==True:
            for r in residues_in_pdb:
                if r not in residues_with_proper_CA:
                    self.chain_breaks.append(r)
Esempio n. 5
0
    def __init__(self,
                 model,
                 radius,
                 offset,
                 hse_up_key,
                 hse_down_key,
                 angle_key=None):
        """
        @param model: model
        @type model: L{Model}

        @param radius: HSE radius
        @type radius: float

        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int

        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string

        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string

        @param angle_key: key used to store the angle between CA-CB and CA-pCB in
            the entity.xtra attribute
        @type angle_key: string
        """
        assert (offset >= 0)
        # For PyMOL visualization
        self.ca_cb_list = []
        ppb = CaPPBuilder()
        ppl = ppb.build_peptides(model)
        hse_map = {}
        hse_list = []
        hse_keys = []
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                if i == 0:
                    r1 = None
                else:
                    r1 = pp1[i - 1]
                r2 = pp1[i]
                if i == len(pp1) - 1:
                    r3 = None
                else:
                    r3 = pp1[i + 1]
                # This method is provided by the subclasses to calculate HSE
                result = self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle = result
                hse_u = 0
                hse_d = 0
                ca2 = r2['CA'].get_vector()
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i - j) <= offset:
                            # neighboring residues in the chain are ignored
                            continue
                        ro = pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao = ro['CA'].get_vector()
                        d = (cao - ca2)
                        if d.norm() < radius:
                            if d.angle(pcb) < (pi / 2):
                                hse_u += 1
                            else:
                                hse_d += 1
                res_id = r2.get_id()
                chain_id = r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)] = (hse_u, hse_d, angle)
                hse_list.append((r2, (hse_u, hse_d, angle)))
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key] = hse_u
                r2.xtra[hse_down_key] = hse_d
                if angle_key:
                    r2.xtra[angle_key] = angle
        AbstractPropertyMap.__init__(self, hse_map, hse_keys, hse_list)
Esempio n. 6
0
def main(argv=None):  # IGNORE:C0111
    '''Command line options.'''

    if argv is None:
        argv = sys.argv
    else:
        sys.argv.extend(argv)

    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        action="count",
                        help="set verbosity level [default: %(default)s]")

    # parser.add_argument("-dir", "--structs_dir", required = True )
    parser.add_argument("-db", "--database_name", default='pdb')
    parser.add_argument("-host", "--db_host", default='127.0.0.1')

    parser.add_argument("--procesados",
                        default='/tmp/pdbs_dist_procesados.txt')
    parser.add_argument("--domains",
                        default='/data/databases/pdb/processed/dns_pdbs.tlb')
    parser.add_argument(
        "--seqs", default='/data/databases/pdb/processed/pdb_seq_res.fasta')
    parser.add_argument("--pdbs", default='/data/databases/pdb/')
    parser.add_argument(
        "--distances",
        default='/data/databases/pdb/processed/distances.tbl',
        help=
        "Final output: table with atom distances between residues and ligands. Only for distances less than 'dist' parameter"
    )
    parser.add_argument("--dist", default=5)
    parser.add_argument(
        "--pdbs_with_drug",
        default='/data/databases/pdb/processed/pdbs_with_drug.txt',
        help="Output: list of PDB codes with an associated ligand")

    args = parser.parse_args()

    if not os.path.exists(args.pdbs):
        sys.stderr.write(
            "%s not found. Specify where is pdbs/divided directory" %
            (parser.pdbs))
        sys.exit(1)
    PDB_PATH = args.pdbs
    CONTACT_DIST = args.dist

    pdbs_with_drug_path = args.pdbs_with_drug
    if not os.path.exists(os.path.dirname(args.pdbs_with_drug)):
        sys.stderr.write("can't %s create %s. Set pdbs_with_drug correctly" %
                         (pdbs_with_drug_path))
        sys.exit(1)

    if not os.path.exists(os.path.dirname(args.distances)):
        sys.stderr.write("can't %s create %s. Set distances correctly" %
                         (args.distances))
        sys.exit(1)

    pdbs_procesados_path = args.procesados
    print(
        "In %s the processed pdbs are kept, if the file is deleted, the process starts from scratch "
        % pdbs_procesados_path)
    print("Outputs: '%s' and '%s' " % (pdbs_with_drug_path, args.distances))

    pdbs_procesados = []
    if os.path.exists(pdbs_procesados_path):
        with open(pdbs_procesados_path) as handle:
            pdbs_procesados = [x.strip() for x in handle.readlines()]
        pdbs_procesados = {x: 1 for x in pdbs_procesados}

    pdbs_iterator = PDBsIterator(pdb_dir=args.pdbs)

    def not_processed_iter():
        for pdb, pdb_path in pdbs_iterator:
            if pdb not in pdbs_procesados:
                yield [pdb, pdb_path]

    DNsPDBs = args.domains

    if not os.path.exists(DNsPDBs):
        seqs_from_pdb = args.seqs
        if not os.path.exists(seqs_from_pdb):
            sys.stderr.write(
                "%s does not exists and %s not found. Specify where it is." %
                (DNsPDBs, seqs_from_pdb))
            sys.exit(1)

        sys.stderr.write(
            "%s not found. You can create it with the following command: \n" %
            DNsPDBs)
        sys.stderr.write(
            "hmmscan --cut_tc --domtblout dns_pdbs.tlb --acc -o pdb_seq_res.hmm Pfam-A.hmm seqs_from_pdb.fasta"
        )
        sys.exit(1)

    drugcompounds = [
        x for x, y in compound_type.items() if y in ["DRUG", "COFACTOR"]
    ]
    othercompounds = [
        x for x, y in compound_type.items()
        if y in ["METAL", "SUGAR", "NUCLEOTIDE", "LIPID"]
    ]
    aminoacidcompounds = [
        x for x, y in compound_type.items() if y in ["MODIFIED", "RESIDUE"]
    ]

    drugcompounds = othercompounds + drugcompounds

    pdbs_with_drug_path = "/data/databases/pdb/processed/pdbs_with_drug.txt"

    _log.info("proceced pdbs: %i" % len(pdbs_procesados))

    ppb = CaPPBuilder()
    p = PDBParser(PERMISSIVE=1, QUIET=1)

    pdbs_with_drug = []
    if os.path.exists(pdbs_with_drug_path):
        _log.info("pdbs with drugs already loaded")
        with open(pdbs_with_drug_path) as handle:
            for x in handle.readlines():
                pdbs_with_drug.append(x.strip())
    else:
        with open(pdbs_with_drug_path, "a") as handle:
            _log.info("pdbs with drugs will be loaded")
            pdbs = list(pdbs_iterator)
            for pdb, file_path in tqdm(pdbs):
                try:
                    if pdb not in pdbs_with_drug:
                        structure = p.get_structure(pdb, file_path)
                        for res in structure.get_residues():
                            if res.resname in drugcompounds:
                                pdbs_with_drug.append(pdb)
                                handle.write(pdb + "\n")
                                handle.flush()
                                break
                except Exception as ex:
                    print(str(ex))

    # import re
    # dns_table = re.sub(r" +", "\t","\n".join( [str(i) + "\t" + x for i,x in enumerate(open('/data/databases/pdb/processed/dns_pdbs.tlb').readlines()) if not x.startswith("#") ]) )
    if not os.path.exists(DNsPDBs + "2"):
        cols = [
            "target_name", "accession", "tlen", "query_name", "accession2",
            "qlen", "E-value", "score1", "bias1", "#", "of", "c-Evalue",
            "i-Evalue", "score2", "bias2", "from1", "to1", "from2", "to2",
            "from3", "to3", "acc"
        ]
        _log.info("correcting hmmer-pdb output")

        regexp = re.compile(" +")
        items = []
        for x in tqdm(open(DNsPDBs).readlines()):
            if not x.startswith("#"):
                line = regexp.split(x)
                items.append(line[0:len(cols)])
                # record = {c: line[i] for i, c in enumerate(cols)}

        df_hmm = pd.DataFrame.from_records(items, columns=cols)
        # df_hmm =  df = pd.read_table('/data/databases/pdb/processed/dns_pdbs.tlb', index_col=None, header=None, delimiter=r"\s+",comment="#",names=cols)
        # df_hmm = df_hmm.dropna()
        df_hmm = df_hmm[["accession", "query_name", "from3", "to3"]]
        df_hmm.to_csv(DNsPDBs + "2")
        df_hmm["pdb"] = map(lambda x: x.split("_")[0].lower().strip(),
                            df_hmm["query_name"])
        df_hmm["chain"] = map(lambda x: x.split("_")[1].upper().strip(),
                              df_hmm["query_name"])
        df_hmm["start_res"] = map(lambda x: x.split("_")[2].upper().strip(),
                                  df_hmm["query_name"])
        df_hmm["end_res"] = map(lambda x: x.split("_")[3].upper().strip(),
                                df_hmm["query_name"])
    else:
        df_hmm = pd.read_csv(DNsPDBs + "2")
        df_hmm["pdb"] = map(lambda x: x.split("_")[0].lower().strip(),
                            df_hmm["query_name"])
        df_hmm["chain"] = map(lambda x: x.split("_")[1].upper().strip(),
                              df_hmm["query_name"])
        df_hmm["start_res"] = map(lambda x: x.split("_")[2].upper().strip(),
                                  df_hmm["query_name"])
        df_hmm["end_res"] = map(lambda x: x.split("_")[3].upper().strip(),
                                df_hmm["query_name"])
    print(len(df_hmm))

    lock = Lock()

    def centeroid(arr):
        length = len(arr)
        sum_x = np.sum([x.coord[0] for x in arr])
        sum_y = np.sum([x.coord[1] for x in arr])
        sum_z = np.sum([x.coord[2] for x in arr])
        return sum_x / length, sum_y / length, sum_z / length

    def residues_near_drug(drug_centroid, aa_residues):
        residues_near = []
        for r in aa_residues:
            for a in list(r):
                dist = a - Struct(coord=drug_centroid)
                if dist > 20:
                    break
                if dist < 10:
                    residues_near.append(r)
                    break
        return residues_near

    def juan(pdb_raw):
        try:
            pepe(pdb_raw)
        except Exception:
            traceback.print_exc()
        finally:
            with lock:
                pdbs_procesados.append(pdb_raw)
                with open(pdbs_procesados_path, "a") as handle:
                    handle.write(pdb_raw + "\n")

    def pepe(pdb):
        ppb = CaPPBuilder()
        p = PDBParser(PERMISSIVE=1, QUIET=1)
        path_dir = PDB_PATH + "/" + pdb[1:3].lower() + "/"
        path = path_dir + "pdb" + pdb.lower() + ".ent"
        model = list(p.get_structure('X', path))[0]

        for chain_obj in list(model):
            chain = chain_obj.id

            hmm_residues = {}

            pdb_seq = list(model[chain].get_residues())
            if pdb_seq:
                hmm_contacts = {}
                hmm_residues = {}

                hmms = df_hmm[(df_hmm["pdb"] == pdb)
                              & (df_hmm["chain"] == chain) &
                              (df_hmm["start_res"] == str(pdb_seq[0].id[1]))]
                for j, hmm in hmms.iterrows():
                    try:
                        hmm_start = int(hmm["from3"]) - 1
                        hmm_end = int(hmm["to3"]) - 1
                        hmm_chain_name = "_".join(
                            map(str, [
                                hmm["accession"].split(".")[0], hmm["chain"],
                                pdb_seq[hmm_start].id[1],
                                pdb_seq[hmm_end].id[1]
                            ]))
                        hmm_contacts[hmm_chain_name] = []
                        hmm_residues.update({
                            res.id[1]: hmm_chain_name
                            for res in pdb_seq[hmm_start:hmm_end]
                        })
                    except IndexError:
                        print(pdb, hmm["accession"], hmm["chain"], hmm_start,
                              hmm_end, pdb_seq)

            aa_residues = []
            drug_molecules = []
            for res_obj in chain_obj.get_residues():
                if res_obj.resname in drugcompounds:
                    drug_molecules.append(res_obj)
                elif res_obj.resname in aminoacidcompounds:
                    aa_residues.append(res_obj)

            for res_drug_obj in drug_molecules:
                drug_centroid = centeroid(list(res_drug_obj))
                near_residues = residues_near_drug(drug_centroid, aa_residues)
                for drug_atom in list(res_drug_obj):
                    for near_residue in near_residues:
                        for residue_atom in list(near_residue):
                            distance = (residue_atom - drug_atom)
                            if distance > 20:
                                break
                            if distance < CONTACT_DIST:
                                with open(args.distances, "a") as handle:
                                    hmm_name = hmm_residues[
                                        near_residue.id[1]] if near_residue.id[
                                            1] in hmm_residues else "NoDn"
                                    fields = [
                                        pdb, chain, hmm_name,
                                        near_residue.id[1],
                                        near_residue.resname,
                                        residue_atom.serial_number,
                                        res_drug_obj.id[1],
                                        res_drug_obj.resname,
                                        drug_atom.serial_number, distance
                                    ]
                                    handle.write("\t".join(map(str, fields)) +
                                                 "\n")

    _log.info("processing distances file")
    for x in tqdm(set(pdbs_with_drug)):
        if x not in pdbs_procesados:
            juan(x)

    # pool = ThreadPool(1)
    # pool.map(juan, set(pdbs_with_drug) - set(pdbs_procesados))

    print("Finished!!!")
Esempio n. 7
0
    def pepe(pdb):
        ppb = CaPPBuilder()
        p = PDBParser(PERMISSIVE=1, QUIET=1)
        path_dir = PDB_PATH + "/" + pdb[1:3].lower() + "/"
        path = path_dir + "pdb" + pdb.lower() + ".ent"
        model = list(p.get_structure('X', path))[0]

        for chain_obj in list(model):
            chain = chain_obj.id

            hmm_residues = {}

            pdb_seq = list(model[chain].get_residues())
            if pdb_seq:
                hmm_contacts = {}
                hmm_residues = {}

                hmms = df_hmm[(df_hmm["pdb"] == pdb)
                              & (df_hmm["chain"] == chain) &
                              (df_hmm["start_res"] == str(pdb_seq[0].id[1]))]
                for j, hmm in hmms.iterrows():
                    try:
                        hmm_start = int(hmm["from3"]) - 1
                        hmm_end = int(hmm["to3"]) - 1
                        hmm_chain_name = "_".join(
                            map(str, [
                                hmm["accession"].split(".")[0], hmm["chain"],
                                pdb_seq[hmm_start].id[1],
                                pdb_seq[hmm_end].id[1]
                            ]))
                        hmm_contacts[hmm_chain_name] = []
                        hmm_residues.update({
                            res.id[1]: hmm_chain_name
                            for res in pdb_seq[hmm_start:hmm_end]
                        })
                    except IndexError:
                        print(pdb, hmm["accession"], hmm["chain"], hmm_start,
                              hmm_end, pdb_seq)

            aa_residues = []
            drug_molecules = []
            for res_obj in chain_obj.get_residues():
                if res_obj.resname in drugcompounds:
                    drug_molecules.append(res_obj)
                elif res_obj.resname in aminoacidcompounds:
                    aa_residues.append(res_obj)

            for res_drug_obj in drug_molecules:
                drug_centroid = centeroid(list(res_drug_obj))
                near_residues = residues_near_drug(drug_centroid, aa_residues)
                for drug_atom in list(res_drug_obj):
                    for near_residue in near_residues:
                        for residue_atom in list(near_residue):
                            distance = (residue_atom - drug_atom)
                            if distance > 20:
                                break
                            if distance < CONTACT_DIST:
                                with open(args.distances, "a") as handle:
                                    hmm_name = hmm_residues[
                                        near_residue.id[1]] if near_residue.id[
                                            1] in hmm_residues else "NoDn"
                                    fields = [
                                        pdb, chain, hmm_name,
                                        near_residue.id[1],
                                        near_residue.resname,
                                        residue_atom.serial_number,
                                        res_drug_obj.id[1],
                                        res_drug_obj.resname,
                                        drug_atom.serial_number, distance
                                    ]
                                    handle.write("\t".join(map(str, fields)) +
                                                 "\n")
Esempio n. 8
0
    def __init__(self, model, radius, offset=0, hse_up_key='HSE_U', hse_down_key='HSE_D', angle_key=None, check_chain_breaks=False, 
                 check_knots=False, receptor=None, signprot=None):
        """
        @param model: model
        @type model: L{Model}

        @param radius: HSE radius
        @type radius: float

        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int

        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string

        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string

        @param angle_key: key used to store the angle between CA-CB and CA-pCB in
        the entity.xtra attribute
        @type angle_key: string
        """
        assert(offset>=0)
        # For PyMOL visualization
        self.ca_cb_list=[]
        ppb=CaPPBuilder()
        ppl=ppb.build_peptides(model)
        hse_map={}
        hse_list=[]
        hse_keys=[]
        ### GP
        if model.get_id()!=0:
            model = model[0]
        residues_in_pdb,residues_with_proper_CA=[],[]
        if check_chain_breaks==True:
            # for m in model:
                for chain in model:
                    for res in chain:
                        # try:
                            if is_aa(res):
                                residues_in_pdb.append(res.get_id()[1])
                        # except:
                        #     if is_aa(chain):
                        #         residues_in_pdb.append(chain.get_id()[1])
                        #         print('chain', chain, res)
                        #         break
        self.clash_pairs = []
        self.chain_breaks = []
        
        if check_knots:
            possible_knots = PossibleKnots(receptor, signprot)
            knot_resis = possible_knots.get_resnums()
            self.remodel_resis = {}

        for pp1 in ppl:
            for i in range(0, len(pp1)):
                residues_with_proper_CA.append(pp1[i].get_id()[1])
                if i==0:
                    r1=None
                else:
                    r1=pp1[i-1]
                r2=pp1[i]
                if i==len(pp1)-1:
                    r3=None
                else:
                    r3=pp1[i+1]
                # This method is provided by the subclasses to calculate HSE
                result=self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle=result
                hse_u=0
                hse_d=0
                ca2=r2['CA'].get_vector()
                residue_up=[]   ### GP
                residue_down=[] ### GP
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        try:
                            if r2.get_id()[1]-1!=r1.get_id()[1] or r2.get_id()[1]+1!=r3.get_id()[1]:
                                pass
                            else:
                                raise Exception
                        except:
                            if pp1 is pp2 and abs(i-j)<=offset:
                            # neighboring residues in the chain are ignored
                                continue
                        ro=pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao=ro['CA'].get_vector()
                        d=(cao-ca2)
                        if d.norm()<radius:
                            if d.angle(pcb)<(math.pi/2):
                                hse_u+=1
                                ### GP
                                # Puts residues' names in a list that were found in the upper half sphere
                                residue_up.append(ro)

                                ### end of GP code
                            else:
                                hse_d+=1
                                ### GP
                                # Puts residues' names in a list that were found in the lower half sphere
                                residue_down.append(ro)
                                ### end of GP code
                res_id=r2.get_id()
                chain_id=r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)]=(hse_u, hse_d, angle)
                hse_list.append((r2, (residue_up, residue_down, hse_u, hse_d, angle)))
                ### GP residue_up and residue_down added to hse_list
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key]=hse_u
                r2.xtra[hse_down_key]=hse_d
                if angle_key:
                    r2.xtra[angle_key]=angle

                ### GP checking for knots
                if check_knots:
                    for knot in knot_resis:
                        if knot[0][1]==pp1[i].get_id()[1] and knot[0][0]==pp1[i].get_parent().get_id():
                            print(pp1[i].get_parent().get_id(),pp1[i])
                            for r in residue_up:
                                if r.get_parent().get_id()==knot[1][0] and r.get_id()[1] in knot[1][1]:
                                    print('close: ', r.get_parent().get_id(),r)
                                    resi_range = [knot[1][1][0], knot[1][1][-1]]
                                    if knot[1][0] not in self.remodel_resis:
                                        self.remodel_resis[knot[1][0]] = [resi_range]
                                    else:
                                        if resi_range not in self.remodel_resis[knot[1][0]]:
                                            self.remodel_resis[knot[1][0]].append(resi_range)

                ### GP checking for atom clashes
                include_prev, include_next = False, False
                try:
                    if pp1[i].get_id()[1]-1!=pp1[i-1].get_id()[1]:
                        include_prev = True
                except:
                    include_prev = False
                try:
                    if pp1[i].get_id()[1]+1!=pp1[i+1].get_id()[1]:
                        include_next = True
                except:
                    include_next = False
                for atom in pp1[i]:
                    ref_vector = atom.get_vector()
                    for other_res in residue_up:
                        try:
                            if other_res==pp1[i-1] and include_prev==False:
                                continue
                            elif len(pp1)>=i+1 and other_res==pp1[i+1] and include_next==False:
                                continue
                            else:
                                raise Exception
                        except:
                            for other_atom in other_res:
                                other_vector = other_atom.get_vector()
                                d = other_vector-ref_vector
                                if d.norm()<2:
                                    if len(str(pp1[i]['CA'].get_bfactor()).split('.')[1])==1:
                                        clash_res1 = float(str(pp1[i]['CA'].get_bfactor())+'0')
                                    else:
                                        clash_res1 = pp1[i]['CA'].get_bfactor()
                                    if len(str(other_res['CA'].get_bfactor()).split('.')[1])==1:
                                        clash_res2 = float(str(other_res['CA'].get_bfactor())+'0')
                                    else:
                                        clash_res2 = other_res['CA'].get_bfactor()
                                    self.clash_pairs.append([(clash_res1, pp1[i].get_id()[1]), (clash_res2, other_res.get_id()[1])])
        if check_chain_breaks==True:
            for r in residues_in_pdb:
                if r not in residues_with_proper_CA:
                    self.chain_breaks.append(r)
Esempio n. 9
0
    def __init__(self,
                 model,
                 radius,
                 offset=0,
                 hse_up_key='HSE_U',
                 hse_down_key='HSE_D',
                 angle_key=None):
        """
        @param model: model
        @type model: L{Model}
        
        @param radius: HSE radius
        @type radius: float
        
        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int
        
        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string
        
        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string
        
        @param angle_key: key used to store the angle between CA-CB and CA-pCB in
        the entity.xtra attribute
        @type angle_key: string
        """
        assert (offset >= 0)
        # For PyMOL visualization
        self.ca_cb_list = []
        ppb = CaPPBuilder()
        ppl = ppb.build_peptides(model)
        hse_map = {}
        hse_list = []
        hse_keys = []
        ### GP
        self.clash_pairs = []
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                if i == 0:
                    r1 = None
                else:
                    r1 = pp1[i - 1]
                r2 = pp1[i]
                if i == len(pp1) - 1:
                    r3 = None
                else:
                    r3 = pp1[i + 1]
                # This method is provided by the subclasses to calculate HSE
                result = self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle = result
                hse_u = 0
                hse_d = 0
                ca2 = r2['CA'].get_vector()
                residue_up = []  ### GP
                residue_down = []  ### GP
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i - j) <= offset:
                            # neighboring residues in the chain are ignored
                            continue
                        ro = pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao = ro['CA'].get_vector()
                        d = (cao - ca2)
                        if d.norm() < radius:
                            if d.angle(pcb) < (math.pi / 2):
                                hse_u += 1
                                ### GP
                                # Puts residues' names in a list that were found in the upper half sphere
                                residue_up.append(ro)
                                ### end of GP code
                            else:
                                hse_d += 1
                                ### GP
                                # Puts residues' names in a list that were found in the lower half sphere
                                residue_down.append(ro)
                                ### end of GP code
                res_id = r2.get_id()
                chain_id = r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)] = (hse_u, hse_d, angle)
                hse_list.append(
                    (r2, (residue_up, residue_down, hse_u, hse_d, angle)))
                ### GP residue_up and residue_down added to hse_list
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key] = hse_u
                r2.xtra[hse_down_key] = hse_d
                if angle_key:
                    r2.xtra[angle_key] = angle

                ### GP checking for atom clashes
                for atom in pp1[i]:
                    ref_vector = atom.get_vector()
                    for other_res in residue_up:
                        try:
                            if other_res != pp1[i - 1] and other_res != pp1[i +
                                                                            1]:
                                for other_atom in other_res:
                                    other_vector = other_atom.get_vector()
                                    d = other_vector - ref_vector
                                    if d.norm() < 2:
                                        self.clash_pairs.append([
                                            (pp1[i]['CA'].get_bfactor(),
                                             pp1[i].get_id()[1]),
                                            (other_res['CA'].get_bfactor(),
                                             other_res.get_id()[1])
                                        ])
                        except:
                            pass
Esempio n. 10
0
    def __init__(self, model, radius, offset, hse_up_key, hse_down_key, 
            angle_key=None):
        """
        @param model: model
        @type model: L{Model}

        @param radius: HSE radius
        @type radius: float

        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int

        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string

        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string

        @param angle_key: key used to store the angle between CA-CB and CA-pCB in 
            the entity.xtra attribute
        @type angle_key: string
        """
        assert(offset>=0)
        # For PyMOL visualization
        self.ca_cb_list=[]
        ppb=CaPPBuilder()
        ppl=ppb.build_peptides(model)
        hse_map={}
        hse_list=[]
        hse_keys=[]
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                if i==0:
                    r1=None
                else:
                    r1=pp1[i-1]
                r2=pp1[i]
                if i==len(pp1)-1:
                    r3=None
                else:
                    r3=pp1[i+1]
                # This method is provided by the subclasses to calculate HSE
                result=self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle=result
                hse_u=0
                hse_d=0
                ca2=r2['CA'].get_vector()
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i-j)<=offset:
                            # neighboring residues in the chain are ignored 
                            continue
                        ro=pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao=ro['CA'].get_vector()
                        d=(cao-ca2)
                        if d.norm()<radius:
                            if d.angle(pcb)<(pi/2):
                                hse_u+=1
                            else:
                                hse_d+=1
                res_id=r2.get_id()
                chain_id=r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)]=(hse_u, hse_d, angle)
                hse_list.append((r2, (hse_u, hse_d, angle)))
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key]=hse_u
                r2.xtra[hse_down_key]=hse_d
                if angle_key:
                    r2.xtra[angle_key]=angle
        AbstractPropertyMap.__init__(self, hse_map, hse_keys, hse_list)
Esempio n. 11
0
 def build_peptides(self, structure):
     pp_list = self.ppb.build_peptides(structure, aa_only=False)
     if len(pp_list) == 0:  #case of failure
         pp_list = CaPPBuilder().build_peptides(structure, aa_only=False)
     return pp_list
Esempio n. 12
0
class ContactMapper(FeaturesComputer):
    '''
    Extends FeaturesComputer class. Extracts res and chainIds for training and predicting and computes contact maps 
    for training for a given complex
  '''
    def __init__(self,
                 rFname,
                 lFname,
                 computedFeatsRootDir=None,
                 boundAvailable=True,
                 res2res_dist=6.0,
                 isForPrediction=False,
                 statusManager=None):
        '''
      @param rFname: str. path to receptor pdb file
      @param lFname: str. path to ligand pdb file      
      @param computedFeatsRootDir: str. path where features will be stored
      @param boundAvailable: bool. True if bound structures are available. False otherwise. Bound structures must be located
                                   at the same path that unbound structures and need to be named as in the following example:
                                    1A2K_l_u.pdb  1A2K_r_b.pdb
      @param res2res_dist: float. max distance between any heavy atoms of 2 amino acids to be considered as interacting
                                  (Amstrongs)
      @param isForPrediction: bool. False to compute contacts between amino acids, True otherwise. Positive contacts will
                                    be tag as 1, negative as -1. If True, all amino acids will have as tag np.nan
      @param statusManager: class that implements .setStatus(msg) to communicate
    '''
        FeaturesComputer.__init__(self, rFname, lFname, computedFeatsRootDir)

        self.prefixR = os.path.split(rFname)[1].split(".")[0].split("_")[0]
        self.prefixL = os.path.split(lFname)[1].split(".")[0].split("_")[0]
        if self.prefixR == self.prefixL:
            self.prefix = self.prefixR
        else:
            if "<" in self.prefixL:
                raise FeatureComputerException(
                    "Error. Ligand pdbFile name %s must not contain '<' or '>' character"
                    % lFname)
            if ">" in self.prefixR:
                raise FeatureComputerException(
                    "Error. Receptor pdbFile name %s must not contain '<' or'>' character"
                    % rFname)
            self.prefixR = self.getExtendedPrefix(rFname)
            self.prefixL = self.getExtendedPrefix(lFname)

            self.prefix = self.prefixL + "<->" + self.prefixR

        self.isForPrediction = isForPrediction
        self.res2res_dist = res2res_dist
        self.boundAvailable = boundAvailable
        self.outPath = myMakeDir(self.computedFeatsRootDir,
                                 "common/contactMaps")
        self.outName = os.path.join(self.outPath, self.prefix + ".cMap.tab")
        self.parser = PDBParser(QUIET=True)
        #    self.ppb=PPBuilder( radius= 200) # To not worry for broken chains
        self.ppb = CaPPBuilder()
        self.computeFun = self.contactMapOneComplex

    def mapBoundToUnbound(self,
                          structureUnbound,
                          structureBound,
                          skipBoundChainsIds=set([])):
        '''
      Obtains correspondence between unbound structure and bound structure when available. Returns a dictionary
      that maps bound_residue --> equivalent unbound_residue
      
      @param structureUnbound: Bio.PDB.Structure. Structure in bound state
      @param structureBound:   Bio.PDB.Structure. Structure in unbound state
      @param skipBoundChainsIds:   Set of Chars. Set of chain ids that will be skipped for calculations. 
      @return bound2UnboundMapDict: Dict {Bio.PDB.Residue (from bound structure): Bio.PDB.Residue (from unbound structure)}
      
    '''
        bound2UnboundMapDict = {}
        pp_list_unbound = self.ppb.build_peptides(structureUnbound,
                                                  aa_only=False)
        if structureBound is None:  # if there is no bound structure, use just unbound.
            boundToUnboundMap = lambda x: x  #For a given residue will return the same residue
            pp_list_bound = pp_list_unbound
        else:
            pp_list_bound = self.ppb.build_peptides(structureBound,
                                                    aa_only=False)
            mapper = BoundUnboundMapper(
                pp_list_unbound,
                pp_list_bound)  # res_bound->res_unbound mapper object
            mapper.build_correspondence()
            boundToUnboundMap = mapper.mapBoundToUnbound  #For a given bound residue will return its unbound equivalent
        for pp in pp_list_bound:
            for resBound in pp:
                chainBound = resBound.get_full_id()[2]  # str chainId
                if chainBound in skipBoundChainsIds: continue
                resUnbound = boundToUnboundMap(resBound)
                if not resUnbound is None:  #In case there is no equivalent unbound residue for a given bound residue
                    bound2UnboundMapDict[resBound] = resUnbound
        return bound2UnboundMapDict

    def fixHomooligomers(self, structureL, structureR, positiveContacts,
                         chainsInContactL, chainsInContactR):
        '''
      For each interacting pair of residues (resL_1, resR_2), it will add to positiveContacts (res_1L', resR_2) and/or
      (resL_1, resR_2') where resL_1' is an equivalent residue in homooligomers of ligand
      
      @param structureL: Bio.PDB.Structure. Structure of ligand
      @param structureR:   Bio.PDB.Structure. Structure of receptor
      @param positiveContacts:  [(ligandResId, receptorResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue
      @param chainsInContactL:  [(ligandResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue
      @param chainsInContactR:  [(receptorResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue
      @return positiveContacts, chainsInContactL, chainsInContactR. Updated with equivalent residues interactions added
      
    '''
        pp_list_l = self.ppb.build_peptides(structureL, aa_only=False)
        equivalentLmapper = HomoOligomerFinder(pp_list_l,
                                               positiveContacts,
                                               chainType="l")
        positiveContacts, chainsInContactL = equivalentLmapper.update_interactions(
        )
        pp_list_r = self.ppb.build_peptides(structureR, aa_only=False)
        equivalentRmapper = HomoOligomerFinder(pp_list_r,
                                               positiveContacts,
                                               chainType="r")
        positiveContacts, chainsInContactR = equivalentRmapper.update_interactions(
        )
        return positiveContacts, chainsInContactL, chainsInContactR

    def getPairsOfResiduesInContact(self, structureL, structureR):
        '''
      Computes which amino acids of ligand are in contact with which amino acids of receptor
      
      @param structureL: Bio.PDB.Structure. Structure of ligand (bound state if available)
      @param structureR:   Bio.PDB.Structure. Structure of receptor (bound state if available).
      @return positiveContacts:  Set {(Bio.PDB.Residue.fullResId (from bound structure structureL), Bio.PDB.Residue.fullResId (from bound structure structureR))}
      @return chainsNotContactL: Set { str(chainId structureL)}
      @return chainsNotContactR: Set { str(chainId structureR)}
      
    '''
        try:
            atomListL = [
                atom for atom in structureL.child_list[0].get_atoms()
                if not atom.name.startswith("H")
            ]
        except IndexError:
            raise NoValidPDBFile("Problems parsing pdbFile 1")
        try:
            atomListR = [
                atom for atom in structureR.child_list[0].get_atoms()
                if not atom.name.startswith("H")
            ]
        except IndexError:
            raise NoValidPDBFile("Problems parsing pdbFile 2")

        searcher = NeighborSearch(atomListL + atomListR)
        allNeigs = searcher.search_all(self.res2res_dist, level="R")
        lStructId = structureL.get_id()
        rStructId = structureR.get_id()
        positiveContacts = set([])
        chainsInContactL = set([])
        chainsInContactR = set([])
        for res1, res2 in allNeigs:
            pdbId1, modelId1, chainId1, resId1 = res1.get_full_id()
            pdbId2, modelId2, chainId2, resId2 = res2.get_full_id()
            fullResId1 = res1.get_full_id()
            fullResId2 = res2.get_full_id()
            if pdbId1 == lStructId and pdbId2 == rStructId:
                positiveContacts.add((fullResId1, fullResId2))
                chainsInContactL.add(fullResId1[2])
                chainsInContactR.add(fullResId2[2])
            elif pdbId1 == rStructId and pdbId2 == lStructId:
                positiveContacts.add((fullResId2, fullResId1))
                chainsInContactL.add(fullResId2[2])
                chainsInContactR.add(fullResId1[2])
        if CONSIDER_HOMOOLIG_AS_POS:
            positiveContacts, chainsInContactL, chainsInContactR = self.fixHomooligomers(
                structureL, structureR, positiveContacts, chainsInContactL,
                chainsInContactR)
        allChainsL = set([elem.get_id() for elem in structureL[0].get_list()])
        allChainsR = set([elem.get_id() for elem in structureR[0].get_list()])
        chainsNotContactL = allChainsL.difference(chainsInContactL)
        chainsNotContactR = allChainsR.difference(chainsInContactR)
        return positiveContacts, chainsNotContactL, chainsNotContactR

    def contactMapOneComplex(self):
        '''
      Computes the contact map of a complex. Initial input for complex codification. Contact map is a file written at
      self.computedFeatsRootDir/common/contactMaps/ with name prefix.cMap.tab where prefix is either the common name of
      ligand and receptor pdb files or the concatenation of ligand and receptor names.
      1A2K_l_u.pdb and 1A2K_r_u.pdb  --> 1A2K.cMap.tab
      1A2K_l_u.pdb and 1A22.pdb  --> 1A2K-1A22.cMap.tab
      
    '''
        outName = self.outName
        print(outName)
        if os.path.isfile(outName):
            print('Already computed contact map')
            return 0
        lStructId = self.prefixL + "_l_u.pdb"
        rStructId = self.prefixR + "_r_u.pdb"
        structureL_u = self.parser.get_structure(lStructId, self.lFname)
        structureR_u = self.parser.get_structure(rStructId, self.rFname)
        if self.boundAvailable == False or self.isForPrediction:
            structureL_b = None
            structureR_b = None
        else:
            try:
                lStructId_b = self.prefix + "_l_b.pdb"
                rStructId_b = self.prefix + "_r_b.pdb"
                lFname_b = os.path.join(
                    os.path.split(self.lFname)[0], lStructId_b)
                rFname_b = os.path.join(
                    os.path.split(self.rFname)[0], rStructId_b)
                structureL_b = self.parser.get_structure(lStructId_b, lFname_b)
                structureR_b = self.parser.get_structure(rStructId_b, rFname_b)
            except IOError as e:  # in this case there are just unbound pdbs available
                structureL_b = None
                structureR_b = None

        if self.isForPrediction:
            positiveContacts = None
            chainsNotContactR = set([])
            chainsNotContactL = set([])
        elif structureL_b is None or structureR_b is None:  #Compute contacs in bound structures
            positiveContacts, chainsNotContactL, chainsNotContactR = self.getPairsOfResiduesInContact(
                structureL_u, structureR_u)
        else:  #Compute contacs in unbound structures
            positiveContacts, chainsNotContactL, chainsNotContactR = self.getPairsOfResiduesInContact(
                structureL_b, structureR_b)

        if JUST_INTERACTING_CHAINS == False:
            chainsNotContactR = set([])
            chainsNotContactL = set([])

        rResDict = self.mapBoundToUnbound(structureR_u,
                                          structureR_b,
                                          skipBoundChainsIds=chainsNotContactR)
        lResDict = self.mapBoundToUnbound(structureL_u,
                                          structureL_b,
                                          skipBoundChainsIds=chainsNotContactL)
        nResiduesL = len(lResDict)
        nResiduesR = len(rResDict)
        if not (self.minNumResiduesPartner < nResiduesL <
                self.maxNumResiduesPartner):
            raise BadNumberOfResidues(nResiduesL, "1")
        if not (self.minNumResiduesPartner < nResiduesR <
                self.maxNumResiduesPartner):
            raise BadNumberOfResidues(nResiduesR, "2")

        outFile = open(outName, "w")
        outFile.write(
            "chainIdL structResIdL resNameL chainIdR structResIdR resNameR categ\n"
        )
        #    print(sorted(lResDict, key= lambda x: x.get_id()))
        #    a= raw_input()
        try:
            for resL_bound in sorted(lResDict, key=lambda x: x.get_full_id()):
                #      print(resL_bound.get_full_id())
                resL_unbound = lResDict[resL_bound]
                pdbIdL, modelL, chainIdL, resIdL = resL_unbound.get_full_id()
                resIdL = self.makeStrResId(resIdL)

                try:
                    letraL = three_to_one(resL_unbound.resname)
                    if letraL != three_to_one(resL_bound.resname): continue
                except KeyError:
                    continue
                for resR_bound in sorted(rResDict,
                                         key=lambda x: x.get_full_id()):
                    resR_unbound = rResDict[resR_bound]
                    pdbIdR, modelR, chainIdR, resIdR = resR_unbound.get_full_id(
                    )
                    try:
                        letraR = three_to_one(resR_unbound.resname)
                        if letraR != three_to_one(resR_bound.resname): continue
                    except KeyError:
                        continue
                    if self.isForPrediction:
                        categ = np.nan
                    elif (resL_bound.get_full_id(),
                          resR_bound.get_full_id()) in positiveContacts:
                        categ = 1
                    else:
                        categ = -1
                    resIdR = self.makeStrResId(resIdR)
                    if chainIdL == " ": chainIdL = "*"
                    if chainIdR == " ": chainIdR = "*"
                    #        print("%s %s %s %s %s %s %s\n" %(chainIdL, resIdL, letraL, chainIdR, resIdR, letraR, categ) )
                    #        raw_input("enter")
                    outFile.write("%s %s %s %s %s %s %s\n" %
                                  (chainIdL, resIdL, letraL, chainIdR, resIdR,
                                   letraR, categ))
            outFile.close()
        except (KeyboardInterrupt, Exception):
            print("Exception happend computing %s" % outName)
            tryToRemove(outName)
            raise

    def makeStrResId(self, resId):
        valList = [str(elem) for elem in resId[1:]]
        finalId = "".join(valList).strip()
        return finalId