def extract_seq_from_models(protien_name, file_name, fasta_to_write):
    print "Working with.....: ", file_name
    #GET the first model from the pdb and write to a temp file
    #the code based on BioPython lib to extract the 1-letter sequence works fast if we just one model from a large pdb
    #thus reducing the memory
    temp_pdb_file = "_temp_pdb.pdb"
    with open(temp_pdb_file, "a") as temp:
        with open(file_name) as ip:
            for line in ip:
                temp.write(line)
                if (line[0] == 'T'):
                    break

    structure = PDBParser().get_structure(protien_name, temp_pdb_file)
    # Using CA-CA
    ppb = CaPPBuilder()
    for pp in ppb.build_peptides(structure):
        seq = pp.get_sequence()

    os.remove(fasta_to_write)
    with open(fasta_to_write, "a") as fasta:
        fasta.write(">" + protien_name + "_seq\n")
        fasta.write(str(seq))
    ip.close()
    temp.close()
    fasta.close()
    os.remove(temp_pdb_file)
    print "Done, output file stored at: ", fasta_to_write
Ejemplo n.º 2
0
    def __init__(self, model, radius=12.0, offset=0):
        """Initialize.

        A residue's exposure is defined as the number of CA atoms around
        that residues CA atom. A dictionary is returned that uses a L{Residue}
        object as key, and the residue exposure as corresponding value.

        :param model: the model that contains the residues
        :type model: L{Model}

        :param radius: radius of the sphere (centred at the CA atom)
        :type radius: float

        :param offset: number of flanking residues that are ignored in
                       the calculation of the number of neighbors
        :type offset: int

        """
        assert (offset >= 0)
        ppb = CaPPBuilder()
        ppl = ppb.build_peptides(model)
        fs_map = {}
        fs_list = []
        fs_keys = []
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                fs = 0
                r1 = pp1[i]
                if not is_aa(r1) or not r1.has_id('CA'):
                    continue
                ca1 = r1['CA']
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i - j) <= offset:
                            continue
                        r2 = pp2[j]
                        if not is_aa(r2) or not r2.has_id('CA'):
                            continue
                        ca2 = r2['CA']
                        d = (ca2 - ca1)
                        if d < radius:
                            fs += 1
                res_id = r1.get_id()
                chain_id = r1.get_parent().get_id()
                # Fill the 3 data structures
                fs_map[(chain_id, res_id)] = fs
                fs_list.append((r1, fs))
                fs_keys.append((chain_id, res_id))
                # Add to xtra
                r1.xtra['EXP_CN'] = fs
        AbstractPropertyMap.__init__(self, fs_map, fs_keys, fs_list)
Ejemplo n.º 3
0
    def __init__(self,
                 rFname,
                 lFname,
                 computedFeatsRootDir=None,
                 boundAvailable=True,
                 res2res_dist=6.0,
                 isForPrediction=False,
                 statusManager=None):
        '''
      @param rFname: str. path to receptor pdb file
      @param lFname: str. path to ligand pdb file      
      @param computedFeatsRootDir: str. path where features will be stored
      @param boundAvailable: bool. True if bound structures are available. False otherwise. Bound structures must be located
                                   at the same path that unbound structures and need to be named as in the following example:
                                    1A2K_l_u.pdb  1A2K_r_b.pdb
      @param res2res_dist: float. max distance between any heavy atoms of 2 amino acids to be considered as interacting
                                  (Amstrongs)
      @param isForPrediction: bool. False to compute contacts between amino acids, True otherwise. Positive contacts will
                                    be tag as 1, negative as -1. If True, all amino acids will have as tag np.nan
      @param statusManager: class that implements .setStatus(msg) to communicate
    '''
        FeaturesComputer.__init__(self, rFname, lFname, computedFeatsRootDir)

        self.prefixR = os.path.split(rFname)[1].split(".")[0].split("_")[0]
        self.prefixL = os.path.split(lFname)[1].split(".")[0].split("_")[0]
        if self.prefixR == self.prefixL:
            self.prefix = self.prefixR
        else:
            if "<" in self.prefixL:
                raise FeatureComputerException(
                    "Error. Ligand pdbFile name %s must not contain '<' or '>' character"
                    % lFname)
            if ">" in self.prefixR:
                raise FeatureComputerException(
                    "Error. Receptor pdbFile name %s must not contain '<' or'>' character"
                    % rFname)
            self.prefixR = self.getExtendedPrefix(rFname)
            self.prefixL = self.getExtendedPrefix(lFname)

            self.prefix = self.prefixL + "<->" + self.prefixR

        self.isForPrediction = isForPrediction
        self.res2res_dist = res2res_dist
        self.boundAvailable = boundAvailable
        self.outPath = myMakeDir(self.computedFeatsRootDir,
                                 "common/contactMaps")
        self.outName = os.path.join(self.outPath, self.prefix + ".cMap.tab")
        self.parser = PDBParser(QUIET=True)
        #    self.ppb=PPBuilder( radius= 200) # To not worry for broken chains
        self.ppb = CaPPBuilder()
        self.computeFun = self.contactMapOneComplex
Ejemplo n.º 4
0
    def __init__(self, model, radius, offset=0, hse_up_key='HSE_U', hse_down_key='HSE_D', angle_key=None, check_chain_breaks=False, 
                 check_knots=False, receptor=None, signprot=None):
        """
        @param model: model
        @type model: L{Model}

        @param radius: HSE radius
        @type radius: float

        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int

        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string

        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string

        @param angle_key: key used to store the angle between CA-CB and CA-pCB in
        the entity.xtra attribute
        @type angle_key: string
        """
        assert(offset>=0)
        # For PyMOL visualization
        self.ca_cb_list=[]
        ppb=CaPPBuilder()
        ppl=ppb.build_peptides(model)
        hse_map={}
        hse_list=[]
        hse_keys=[]
        ### GP
        if model.get_id()!=0:
            model = model[0]
        residues_in_pdb,residues_with_proper_CA=[],[]
        if check_chain_breaks==True:
            # for m in model:
                for chain in model:
                    for res in chain:
                        # try:
                            if is_aa(res):
                                residues_in_pdb.append(res.get_id()[1])
                        # except:
                        #     if is_aa(chain):
                        #         residues_in_pdb.append(chain.get_id()[1])
                        #         print('chain', chain, res)
                        #         break
        self.clash_pairs = []
        self.chain_breaks = []
        
        if check_knots:
            possible_knots = PossibleKnots(receptor, signprot)
            knot_resis = possible_knots.get_resnums()
            self.remodel_resis = {}

        for pp1 in ppl:
            for i in range(0, len(pp1)):
                residues_with_proper_CA.append(pp1[i].get_id()[1])
                if i==0:
                    r1=None
                else:
                    r1=pp1[i-1]
                r2=pp1[i]
                if i==len(pp1)-1:
                    r3=None
                else:
                    r3=pp1[i+1]
                # This method is provided by the subclasses to calculate HSE
                result=self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle=result
                hse_u=0
                hse_d=0
                ca2=r2['CA'].get_vector()
                residue_up=[]   ### GP
                residue_down=[] ### GP
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        try:
                            if r2.get_id()[1]-1!=r1.get_id()[1] or r2.get_id()[1]+1!=r3.get_id()[1]:
                                pass
                            else:
                                raise Exception
                        except:
                            if pp1 is pp2 and abs(i-j)<=offset:
                            # neighboring residues in the chain are ignored
                                continue
                        ro=pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao=ro['CA'].get_vector()
                        d=(cao-ca2)
                        if d.norm()<radius:
                            if d.angle(pcb)<(math.pi/2):
                                hse_u+=1
                                ### GP
                                # Puts residues' names in a list that were found in the upper half sphere
                                residue_up.append(ro)

                                ### end of GP code
                            else:
                                hse_d+=1
                                ### GP
                                # Puts residues' names in a list that were found in the lower half sphere
                                residue_down.append(ro)
                                ### end of GP code
                res_id=r2.get_id()
                chain_id=r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)]=(hse_u, hse_d, angle)
                hse_list.append((r2, (residue_up, residue_down, hse_u, hse_d, angle)))
                ### GP residue_up and residue_down added to hse_list
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key]=hse_u
                r2.xtra[hse_down_key]=hse_d
                if angle_key:
                    r2.xtra[angle_key]=angle

                ### GP checking for knots
                if check_knots:
                    for knot in knot_resis:
                        if knot[0][1]==pp1[i].get_id()[1] and knot[0][0]==pp1[i].get_parent().get_id():
                            print(pp1[i].get_parent().get_id(),pp1[i])
                            for r in residue_up:
                                if r.get_parent().get_id()==knot[1][0] and r.get_id()[1] in knot[1][1]:
                                    print('close: ', r.get_parent().get_id(),r)
                                    resi_range = [knot[1][1][0], knot[1][1][-1]]
                                    if knot[1][0] not in self.remodel_resis:
                                        self.remodel_resis[knot[1][0]] = [resi_range]
                                    else:
                                        if resi_range not in self.remodel_resis[knot[1][0]]:
                                            self.remodel_resis[knot[1][0]].append(resi_range)

                ### GP checking for atom clashes
                include_prev, include_next = False, False
                try:
                    if pp1[i].get_id()[1]-1!=pp1[i-1].get_id()[1]:
                        include_prev = True
                except:
                    include_prev = False
                try:
                    if pp1[i].get_id()[1]+1!=pp1[i+1].get_id()[1]:
                        include_next = True
                except:
                    include_next = False
                for atom in pp1[i]:
                    ref_vector = atom.get_vector()
                    for other_res in residue_up:
                        try:
                            if other_res==pp1[i-1] and include_prev==False:
                                continue
                            elif len(pp1)>=i+1 and other_res==pp1[i+1] and include_next==False:
                                continue
                            else:
                                raise Exception
                        except:
                            for other_atom in other_res:
                                other_vector = other_atom.get_vector()
                                d = other_vector-ref_vector
                                if d.norm()<2:
                                    if len(str(pp1[i]['CA'].get_bfactor()).split('.')[1])==1:
                                        clash_res1 = float(str(pp1[i]['CA'].get_bfactor())+'0')
                                    else:
                                        clash_res1 = pp1[i]['CA'].get_bfactor()
                                    if len(str(other_res['CA'].get_bfactor()).split('.')[1])==1:
                                        clash_res2 = float(str(other_res['CA'].get_bfactor())+'0')
                                    else:
                                        clash_res2 = other_res['CA'].get_bfactor()
                                    self.clash_pairs.append([(clash_res1, pp1[i].get_id()[1]), (clash_res2, other_res.get_id()[1])])
        if check_chain_breaks==True:
            for r in residues_in_pdb:
                if r not in residues_with_proper_CA:
                    self.chain_breaks.append(r)
Ejemplo n.º 5
0
    def __init__(self,
                 model,
                 radius,
                 offset,
                 hse_up_key,
                 hse_down_key,
                 angle_key=None):
        """
        @param model: model
        @type model: L{Model}

        @param radius: HSE radius
        @type radius: float

        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int

        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string

        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string

        @param angle_key: key used to store the angle between CA-CB and CA-pCB in
            the entity.xtra attribute
        @type angle_key: string
        """
        assert (offset >= 0)
        # For PyMOL visualization
        self.ca_cb_list = []
        ppb = CaPPBuilder()
        ppl = ppb.build_peptides(model)
        hse_map = {}
        hse_list = []
        hse_keys = []
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                if i == 0:
                    r1 = None
                else:
                    r1 = pp1[i - 1]
                r2 = pp1[i]
                if i == len(pp1) - 1:
                    r3 = None
                else:
                    r3 = pp1[i + 1]
                # This method is provided by the subclasses to calculate HSE
                result = self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle = result
                hse_u = 0
                hse_d = 0
                ca2 = r2['CA'].get_vector()
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i - j) <= offset:
                            # neighboring residues in the chain are ignored
                            continue
                        ro = pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao = ro['CA'].get_vector()
                        d = (cao - ca2)
                        if d.norm() < radius:
                            if d.angle(pcb) < (pi / 2):
                                hse_u += 1
                            else:
                                hse_d += 1
                res_id = r2.get_id()
                chain_id = r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)] = (hse_u, hse_d, angle)
                hse_list.append((r2, (hse_u, hse_d, angle)))
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key] = hse_u
                r2.xtra[hse_down_key] = hse_d
                if angle_key:
                    r2.xtra[angle_key] = angle
        AbstractPropertyMap.__init__(self, hse_map, hse_keys, hse_list)
Ejemplo n.º 6
0
def main(argv=None):  # IGNORE:C0111
    '''Command line options.'''

    if argv is None:
        argv = sys.argv
    else:
        sys.argv.extend(argv)

    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        action="count",
                        help="set verbosity level [default: %(default)s]")

    # parser.add_argument("-dir", "--structs_dir", required = True )
    parser.add_argument("-db", "--database_name", default='pdb')
    parser.add_argument("-host", "--db_host", default='127.0.0.1')

    parser.add_argument("--procesados",
                        default='/tmp/pdbs_dist_procesados.txt')
    parser.add_argument("--domains",
                        default='/data/databases/pdb/processed/dns_pdbs.tlb')
    parser.add_argument(
        "--seqs", default='/data/databases/pdb/processed/pdb_seq_res.fasta')
    parser.add_argument("--pdbs", default='/data/databases/pdb/')
    parser.add_argument(
        "--distances",
        default='/data/databases/pdb/processed/distances.tbl',
        help=
        "Final output: table with atom distances between residues and ligands. Only for distances less than 'dist' parameter"
    )
    parser.add_argument("--dist", default=5)
    parser.add_argument(
        "--pdbs_with_drug",
        default='/data/databases/pdb/processed/pdbs_with_drug.txt',
        help="Output: list of PDB codes with an associated ligand")

    args = parser.parse_args()

    if not os.path.exists(args.pdbs):
        sys.stderr.write(
            "%s not found. Specify where is pdbs/divided directory" %
            (parser.pdbs))
        sys.exit(1)
    PDB_PATH = args.pdbs
    CONTACT_DIST = args.dist

    pdbs_with_drug_path = args.pdbs_with_drug
    if not os.path.exists(os.path.dirname(args.pdbs_with_drug)):
        sys.stderr.write("can't %s create %s. Set pdbs_with_drug correctly" %
                         (pdbs_with_drug_path))
        sys.exit(1)

    if not os.path.exists(os.path.dirname(args.distances)):
        sys.stderr.write("can't %s create %s. Set distances correctly" %
                         (args.distances))
        sys.exit(1)

    pdbs_procesados_path = args.procesados
    print(
        "In %s the processed pdbs are kept, if the file is deleted, the process starts from scratch "
        % pdbs_procesados_path)
    print("Outputs: '%s' and '%s' " % (pdbs_with_drug_path, args.distances))

    pdbs_procesados = []
    if os.path.exists(pdbs_procesados_path):
        with open(pdbs_procesados_path) as handle:
            pdbs_procesados = [x.strip() for x in handle.readlines()]
        pdbs_procesados = {x: 1 for x in pdbs_procesados}

    pdbs_iterator = PDBsIterator(pdb_dir=args.pdbs)

    def not_processed_iter():
        for pdb, pdb_path in pdbs_iterator:
            if pdb not in pdbs_procesados:
                yield [pdb, pdb_path]

    DNsPDBs = args.domains

    if not os.path.exists(DNsPDBs):
        seqs_from_pdb = args.seqs
        if not os.path.exists(seqs_from_pdb):
            sys.stderr.write(
                "%s does not exists and %s not found. Specify where it is." %
                (DNsPDBs, seqs_from_pdb))
            sys.exit(1)

        sys.stderr.write(
            "%s not found. You can create it with the following command: \n" %
            DNsPDBs)
        sys.stderr.write(
            "hmmscan --cut_tc --domtblout dns_pdbs.tlb --acc -o pdb_seq_res.hmm Pfam-A.hmm seqs_from_pdb.fasta"
        )
        sys.exit(1)

    drugcompounds = [
        x for x, y in compound_type.items() if y in ["DRUG", "COFACTOR"]
    ]
    othercompounds = [
        x for x, y in compound_type.items()
        if y in ["METAL", "SUGAR", "NUCLEOTIDE", "LIPID"]
    ]
    aminoacidcompounds = [
        x for x, y in compound_type.items() if y in ["MODIFIED", "RESIDUE"]
    ]

    drugcompounds = othercompounds + drugcompounds

    pdbs_with_drug_path = "/data/databases/pdb/processed/pdbs_with_drug.txt"

    _log.info("proceced pdbs: %i" % len(pdbs_procesados))

    ppb = CaPPBuilder()
    p = PDBParser(PERMISSIVE=1, QUIET=1)

    pdbs_with_drug = []
    if os.path.exists(pdbs_with_drug_path):
        _log.info("pdbs with drugs already loaded")
        with open(pdbs_with_drug_path) as handle:
            for x in handle.readlines():
                pdbs_with_drug.append(x.strip())
    else:
        with open(pdbs_with_drug_path, "a") as handle:
            _log.info("pdbs with drugs will be loaded")
            pdbs = list(pdbs_iterator)
            for pdb, file_path in tqdm(pdbs):
                try:
                    if pdb not in pdbs_with_drug:
                        structure = p.get_structure(pdb, file_path)
                        for res in structure.get_residues():
                            if res.resname in drugcompounds:
                                pdbs_with_drug.append(pdb)
                                handle.write(pdb + "\n")
                                handle.flush()
                                break
                except Exception as ex:
                    print(str(ex))

    # import re
    # dns_table = re.sub(r" +", "\t","\n".join( [str(i) + "\t" + x for i,x in enumerate(open('/data/databases/pdb/processed/dns_pdbs.tlb').readlines()) if not x.startswith("#") ]) )
    if not os.path.exists(DNsPDBs + "2"):
        cols = [
            "target_name", "accession", "tlen", "query_name", "accession2",
            "qlen", "E-value", "score1", "bias1", "#", "of", "c-Evalue",
            "i-Evalue", "score2", "bias2", "from1", "to1", "from2", "to2",
            "from3", "to3", "acc"
        ]
        _log.info("correcting hmmer-pdb output")

        regexp = re.compile(" +")
        items = []
        for x in tqdm(open(DNsPDBs).readlines()):
            if not x.startswith("#"):
                line = regexp.split(x)
                items.append(line[0:len(cols)])
                # record = {c: line[i] for i, c in enumerate(cols)}

        df_hmm = pd.DataFrame.from_records(items, columns=cols)
        # df_hmm =  df = pd.read_table('/data/databases/pdb/processed/dns_pdbs.tlb', index_col=None, header=None, delimiter=r"\s+",comment="#",names=cols)
        # df_hmm = df_hmm.dropna()
        df_hmm = df_hmm[["accession", "query_name", "from3", "to3"]]
        df_hmm.to_csv(DNsPDBs + "2")
        df_hmm["pdb"] = map(lambda x: x.split("_")[0].lower().strip(),
                            df_hmm["query_name"])
        df_hmm["chain"] = map(lambda x: x.split("_")[1].upper().strip(),
                              df_hmm["query_name"])
        df_hmm["start_res"] = map(lambda x: x.split("_")[2].upper().strip(),
                                  df_hmm["query_name"])
        df_hmm["end_res"] = map(lambda x: x.split("_")[3].upper().strip(),
                                df_hmm["query_name"])
    else:
        df_hmm = pd.read_csv(DNsPDBs + "2")
        df_hmm["pdb"] = map(lambda x: x.split("_")[0].lower().strip(),
                            df_hmm["query_name"])
        df_hmm["chain"] = map(lambda x: x.split("_")[1].upper().strip(),
                              df_hmm["query_name"])
        df_hmm["start_res"] = map(lambda x: x.split("_")[2].upper().strip(),
                                  df_hmm["query_name"])
        df_hmm["end_res"] = map(lambda x: x.split("_")[3].upper().strip(),
                                df_hmm["query_name"])
    print(len(df_hmm))

    lock = Lock()

    def centeroid(arr):
        length = len(arr)
        sum_x = np.sum([x.coord[0] for x in arr])
        sum_y = np.sum([x.coord[1] for x in arr])
        sum_z = np.sum([x.coord[2] for x in arr])
        return sum_x / length, sum_y / length, sum_z / length

    def residues_near_drug(drug_centroid, aa_residues):
        residues_near = []
        for r in aa_residues:
            for a in list(r):
                dist = a - Struct(coord=drug_centroid)
                if dist > 20:
                    break
                if dist < 10:
                    residues_near.append(r)
                    break
        return residues_near

    def juan(pdb_raw):
        try:
            pepe(pdb_raw)
        except Exception:
            traceback.print_exc()
        finally:
            with lock:
                pdbs_procesados.append(pdb_raw)
                with open(pdbs_procesados_path, "a") as handle:
                    handle.write(pdb_raw + "\n")

    def pepe(pdb):
        ppb = CaPPBuilder()
        p = PDBParser(PERMISSIVE=1, QUIET=1)
        path_dir = PDB_PATH + "/" + pdb[1:3].lower() + "/"
        path = path_dir + "pdb" + pdb.lower() + ".ent"
        model = list(p.get_structure('X', path))[0]

        for chain_obj in list(model):
            chain = chain_obj.id

            hmm_residues = {}

            pdb_seq = list(model[chain].get_residues())
            if pdb_seq:
                hmm_contacts = {}
                hmm_residues = {}

                hmms = df_hmm[(df_hmm["pdb"] == pdb)
                              & (df_hmm["chain"] == chain) &
                              (df_hmm["start_res"] == str(pdb_seq[0].id[1]))]
                for j, hmm in hmms.iterrows():
                    try:
                        hmm_start = int(hmm["from3"]) - 1
                        hmm_end = int(hmm["to3"]) - 1
                        hmm_chain_name = "_".join(
                            map(str, [
                                hmm["accession"].split(".")[0], hmm["chain"],
                                pdb_seq[hmm_start].id[1],
                                pdb_seq[hmm_end].id[1]
                            ]))
                        hmm_contacts[hmm_chain_name] = []
                        hmm_residues.update({
                            res.id[1]: hmm_chain_name
                            for res in pdb_seq[hmm_start:hmm_end]
                        })
                    except IndexError:
                        print(pdb, hmm["accession"], hmm["chain"], hmm_start,
                              hmm_end, pdb_seq)

            aa_residues = []
            drug_molecules = []
            for res_obj in chain_obj.get_residues():
                if res_obj.resname in drugcompounds:
                    drug_molecules.append(res_obj)
                elif res_obj.resname in aminoacidcompounds:
                    aa_residues.append(res_obj)

            for res_drug_obj in drug_molecules:
                drug_centroid = centeroid(list(res_drug_obj))
                near_residues = residues_near_drug(drug_centroid, aa_residues)
                for drug_atom in list(res_drug_obj):
                    for near_residue in near_residues:
                        for residue_atom in list(near_residue):
                            distance = (residue_atom - drug_atom)
                            if distance > 20:
                                break
                            if distance < CONTACT_DIST:
                                with open(args.distances, "a") as handle:
                                    hmm_name = hmm_residues[
                                        near_residue.id[1]] if near_residue.id[
                                            1] in hmm_residues else "NoDn"
                                    fields = [
                                        pdb, chain, hmm_name,
                                        near_residue.id[1],
                                        near_residue.resname,
                                        residue_atom.serial_number,
                                        res_drug_obj.id[1],
                                        res_drug_obj.resname,
                                        drug_atom.serial_number, distance
                                    ]
                                    handle.write("\t".join(map(str, fields)) +
                                                 "\n")

    _log.info("processing distances file")
    for x in tqdm(set(pdbs_with_drug)):
        if x not in pdbs_procesados:
            juan(x)

    # pool = ThreadPool(1)
    # pool.map(juan, set(pdbs_with_drug) - set(pdbs_procesados))

    print("Finished!!!")
Ejemplo n.º 7
0
    def pepe(pdb):
        ppb = CaPPBuilder()
        p = PDBParser(PERMISSIVE=1, QUIET=1)
        path_dir = PDB_PATH + "/" + pdb[1:3].lower() + "/"
        path = path_dir + "pdb" + pdb.lower() + ".ent"
        model = list(p.get_structure('X', path))[0]

        for chain_obj in list(model):
            chain = chain_obj.id

            hmm_residues = {}

            pdb_seq = list(model[chain].get_residues())
            if pdb_seq:
                hmm_contacts = {}
                hmm_residues = {}

                hmms = df_hmm[(df_hmm["pdb"] == pdb)
                              & (df_hmm["chain"] == chain) &
                              (df_hmm["start_res"] == str(pdb_seq[0].id[1]))]
                for j, hmm in hmms.iterrows():
                    try:
                        hmm_start = int(hmm["from3"]) - 1
                        hmm_end = int(hmm["to3"]) - 1
                        hmm_chain_name = "_".join(
                            map(str, [
                                hmm["accession"].split(".")[0], hmm["chain"],
                                pdb_seq[hmm_start].id[1],
                                pdb_seq[hmm_end].id[1]
                            ]))
                        hmm_contacts[hmm_chain_name] = []
                        hmm_residues.update({
                            res.id[1]: hmm_chain_name
                            for res in pdb_seq[hmm_start:hmm_end]
                        })
                    except IndexError:
                        print(pdb, hmm["accession"], hmm["chain"], hmm_start,
                              hmm_end, pdb_seq)

            aa_residues = []
            drug_molecules = []
            for res_obj in chain_obj.get_residues():
                if res_obj.resname in drugcompounds:
                    drug_molecules.append(res_obj)
                elif res_obj.resname in aminoacidcompounds:
                    aa_residues.append(res_obj)

            for res_drug_obj in drug_molecules:
                drug_centroid = centeroid(list(res_drug_obj))
                near_residues = residues_near_drug(drug_centroid, aa_residues)
                for drug_atom in list(res_drug_obj):
                    for near_residue in near_residues:
                        for residue_atom in list(near_residue):
                            distance = (residue_atom - drug_atom)
                            if distance > 20:
                                break
                            if distance < CONTACT_DIST:
                                with open(args.distances, "a") as handle:
                                    hmm_name = hmm_residues[
                                        near_residue.id[1]] if near_residue.id[
                                            1] in hmm_residues else "NoDn"
                                    fields = [
                                        pdb, chain, hmm_name,
                                        near_residue.id[1],
                                        near_residue.resname,
                                        residue_atom.serial_number,
                                        res_drug_obj.id[1],
                                        res_drug_obj.resname,
                                        drug_atom.serial_number, distance
                                    ]
                                    handle.write("\t".join(map(str, fields)) +
                                                 "\n")
Ejemplo n.º 8
0
    def __init__(self,
                 model,
                 radius,
                 offset=0,
                 hse_up_key='HSE_U',
                 hse_down_key='HSE_D',
                 angle_key=None):
        """
        @param model: model
        @type model: L{Model}
        
        @param radius: HSE radius
        @type radius: float
        
        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int
        
        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string
        
        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string
        
        @param angle_key: key used to store the angle between CA-CB and CA-pCB in
        the entity.xtra attribute
        @type angle_key: string
        """
        assert (offset >= 0)
        # For PyMOL visualization
        self.ca_cb_list = []
        ppb = CaPPBuilder()
        ppl = ppb.build_peptides(model)
        hse_map = {}
        hse_list = []
        hse_keys = []
        ### GP
        self.clash_pairs = []
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                if i == 0:
                    r1 = None
                else:
                    r1 = pp1[i - 1]
                r2 = pp1[i]
                if i == len(pp1) - 1:
                    r3 = None
                else:
                    r3 = pp1[i + 1]
                # This method is provided by the subclasses to calculate HSE
                result = self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle = result
                hse_u = 0
                hse_d = 0
                ca2 = r2['CA'].get_vector()
                residue_up = []  ### GP
                residue_down = []  ### GP
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i - j) <= offset:
                            # neighboring residues in the chain are ignored
                            continue
                        ro = pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao = ro['CA'].get_vector()
                        d = (cao - ca2)
                        if d.norm() < radius:
                            if d.angle(pcb) < (math.pi / 2):
                                hse_u += 1
                                ### GP
                                # Puts residues' names in a list that were found in the upper half sphere
                                residue_up.append(ro)
                                ### end of GP code
                            else:
                                hse_d += 1
                                ### GP
                                # Puts residues' names in a list that were found in the lower half sphere
                                residue_down.append(ro)
                                ### end of GP code
                res_id = r2.get_id()
                chain_id = r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)] = (hse_u, hse_d, angle)
                hse_list.append(
                    (r2, (residue_up, residue_down, hse_u, hse_d, angle)))
                ### GP residue_up and residue_down added to hse_list
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key] = hse_u
                r2.xtra[hse_down_key] = hse_d
                if angle_key:
                    r2.xtra[angle_key] = angle

                ### GP checking for atom clashes
                for atom in pp1[i]:
                    ref_vector = atom.get_vector()
                    for other_res in residue_up:
                        try:
                            if other_res != pp1[i - 1] and other_res != pp1[i +
                                                                            1]:
                                for other_atom in other_res:
                                    other_vector = other_atom.get_vector()
                                    d = other_vector - ref_vector
                                    if d.norm() < 2:
                                        self.clash_pairs.append([
                                            (pp1[i]['CA'].get_bfactor(),
                                             pp1[i].get_id()[1]),
                                            (other_res['CA'].get_bfactor(),
                                             other_res.get_id()[1])
                                        ])
                        except:
                            pass
Ejemplo n.º 9
0
 def build_peptides(self, structure):
     pp_list = self.ppb.build_peptides(structure, aa_only=False)
     if len(pp_list) == 0:  #case of failure
         pp_list = CaPPBuilder().build_peptides(structure, aa_only=False)
     return pp_list