Example #1
0
def seqres_atom_map(mmcif_dict, c):

    category = "_pdbx_poly_seq_scheme"
    seq_len = len(mmcif_dict[category + ".seq_id"])
    seqres = {}
    for i in range(seq_len):
        seqres_index = mmcif_dict["_pdbx_poly_seq_scheme.seq_id"][i]
        pdb_seq_id = int(mmcif_dict["_pdbx_poly_seq_scheme.pdb_seq_num"][i])
        chain = mmcif_dict["_pdbx_poly_seq_scheme.pdb_strand_id"][i]
        if chain == c:
            res = mmcif_dict["_pdbx_poly_seq_scheme.pdb_mon_id"][i]
            if res == "?":
                sres = "-"
            else:
                sres = tto("{}".format(res))
            key1 = (seqres_index, chain)

            seqres[key1] = sres

    return seqres
def seqres_atom_map(mmcif_dict, c, p):

    if p != "NA":
        r1 = p[0:1]
        p1 = p[1:(len(p) - 1)]
        r2 = p[(len(p) - 1):len(p)]
    else:
        p1 = -1000

    category = "_pdbx_poly_seq_scheme"
    seq_len = len(mmcif_dict[category + ".seq_id"])
    seqres = dict()
    d = dict()
    j = 1
    for i in range(seq_len):
        seqres_index = mmcif_dict["_pdbx_poly_seq_scheme.seq_id"][i]
        pdb_seq_id = int(mmcif_dict["_pdbx_poly_seq_scheme.pdb_seq_num"][i])
        chain = mmcif_dict["_pdbx_poly_seq_scheme.pdb_strand_id"][i]
        if chain == c:
            res = mmcif_dict["_pdbx_poly_seq_scheme.pdb_mon_id"][i]
            if res == "?":
                sres = "-"
            else:
                d[pdb_seq_id] = j
                j = j + 1
                try:
                    sres = tto("{}".format(res))
                except:
                    sres = "X"

            # MUTATION
            if pdb_seq_id == int(p1):
                sres = r2

            key1 = (seqres_index, chain)
            seqres[key1] = sres

    return (seqres, d)
f = open("omim_sprot.csv","r")
ft = f.readlines()
f.close()

g = open("uniprot_pdb.csv","r")
gt = g.readlines()
g.close()

h = open("disease_causing_mutations.txt","w")

k = 0
while k < len(ft):
	ft1 = ft[k].split(",")
	uid = ft1[2]
	res1 = ft1[3]
	res1 = tto("{}".format(res1))
	res2 = ft1[5]
	res2 = tto("{}".format(res2))
	pos = ft1[7]
	dis = ft1[8].strip("\n")

	print("{} of {}".format(k,len(ft)))

	k1 = 2
	gt1 = gt[k1].split(",")
	count = 0
	while gt1[0] != uid:
		if k1 >= len(gt):
			pdb = "NA"
			count = count + 1
			break
Example #4
0
def main_func():

    # CRITERIA
    r = 2.5

    sd = mut_dat("{}/seq_sim.txt".format(pathfiles))

    resolution = res_filter()

    HKW = HETATM_KW()

    import sys
    import re
    import gzip
    from Bio.PDB.MMCIFParser import MMCIFParser
    parser = MMCIFParser(QUIET=True)
    from Bio.PDB.Polypeptide import three_to_one as tto

    # ZONE VARIABLE
    zdis = 10.0

    # PATH FOR THE PDB/mmCIF FILES

    #pathmmcif = "/Users/tarun/Documents/mmCIF"
    pathmmcif = "/Volumes/BIOINFO/mmCIF"

    # LOCAL RMSD OF THE POLYPEPTIDE PRESENT IN DICTIONARY sd

    # DETERMINING THE ZONE AROUND THE MUTANT SITE TO DETERMINE ANY STRUCTURAL CHANGE TAKING PLACE DUE TO THE MUTATION

    z = open("{}".format(sys.argv[3]), "w")
    #temp = open("COM.txt","w")

    count2 = 0
    for x in sd.keys():
        pdbid = "{}".format(x)
        pdb = pdbid[0:4]
        cw = pdbid[5:6]

        typ = HKW["{}".format(pdb)]

        # CHECKING IF THE PDB's ARE ALIGNED
        count1 = 0
        if count1 == 0:
            #try:
            fol = pdb[1:3]
            pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb)
            tar = gzip.open("{}".format(pdbfile), "rb")
            out = open("pdbprocess1.cif", "wb")
            out.write(tar.read())
            tar.close()
            out.close()

            mmcif = MMCIF2Dict("pdbprocess1.cif")
            idmap1 = seqres_atom_map(mmcif)

            kk = 0
            while kk < len(sd["{}".format(x)]):
                pdbid1 = "{}".format(sd["{}".format(x)][kk])
                #print(pdbid1)
                pdb1 = pdbid1[0:4]
                cm = pdbid1[5:6]

                typ1 = HKW["{}".format(pdb1)]
                rescr = resolution[0]["{}".format(pdb1)]

                if rescr == "None":
                    rescr = 1000.0

                if typ1 == typ and float(rescr) < r:

                    fol = pdb1[1:3]
                    pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb1)
                    tar = gzip.open("{}".format(pdbfile), "rb")
                    out = open("pdbprocess2.cif", "wb")
                    out.write(tar.read())
                    tar.close()
                    out.close()

                    mmcif = MMCIF2Dict("pdbprocess2.cif")
                    idmap2 = seqres_atom_map(mmcif)

                    count = 0
                    for i in idmap1.keys():
                        if i[1] == cw:
                            for m in idmap2.keys():
                                if m[1] == cm and i[0] == m[0]:
                                    if idmap2[m] != idmap1[i]:
                                        count = count + 1

                    if count == 0:
                        if count2 == 0:
                            count2 = count2 + 1
                            #print("*** {} AND {} :: {} of {} ***" .format(pdbid,pdbid1,kk,len(sd["{}".format(x)])))

                            # FOR WILDTYPE

                            # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE

                            structure_id = "{}".format(pdb)
                            filename = "pdbprocess1.cif"
                            structure = parser.get_structure(
                                structure_id, filename)

                            model = structure[0]

                            chain = model["{}".format(cw)]
                            c1 = chain.get_list()  # LIST ALL THE RESIDUES

                            k1 = 0
                            resid_list = []
                            resname_list = []
                            com_list = []
                            while k1 < len(c1):
                                c2 = c1[k1].get_id()
                                resid = c2[1]
                                if c2[0] == " ":
                                    residue = chain[c2]
                                    tresname = residue.get_resname()
                                    try:
                                        resname = tto("{}".format(tresname))
                                    except:
                                        resname = "X"
                                    r1 = residue.get_list(
                                    )  # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE

                                    k2 = 0
                                    res = []  # ATOM NAMES
                                    cd = []  # ATOM COORDINATES
                                    while k2 < len(r1):
                                        r2 = r1[k2].get_id()
                                        # COM OF THE BACKBONE
                                        if r2 == "CA" or r2 == "N" or r2 == "C" or r2 == "O":
                                            res.append(r2)

                                            atom = residue['{}'.format(r2)]
                                            a1 = atom.get_coord()
                                            cd.append(a1)
                                        k2 = k2 + 1

                                    CM = COM(res, cd)
                                    resid_list.append(resid)
                                    resname_list.append(resname)
                                    com_list.append(CM)

                                k1 = k1 + 1

                            k1 = 0
                            while k1 < len(resid_list):
                                if resid_list[k1] == int(sys.argv[2]):
                                    v1 = com_list[k1]
                                    zres = "{}".format(resid_list[k1])
                                    zresname = "{}".format(resname_list[k1])
                                    k2 = 0
                                    while k2 < len(resid_list):
                                        if k2 != k1:
                                            v2 = com_list[k2]
                                            dis = (v1[0] - v2[0])**2 + (
                                                v1[1] - v2[1])**2 + (v1[2] -
                                                                     v2[2])**2
                                            if dis < (zdis * zdis):
                                                zres = zres + ",{}".format(
                                                    resid_list[k2])
                                                zresname = zresname + "{}".format(
                                                    resname_list[k2])
                                        k2 = k2 + 1
                                    k1 = len(resid_list)
                                k1 = k1 + 1

                        z.write("{} {} {} {} {}\n".format(
                            pdb, cw, pdb1, cm, zres))
                    else:
                        print("WT AND MUT MISMATCH")
                    print("{} of {} ;; {} {}".format(kk,
                                                     len(sd["{}".format(x)]),
                                                     pdbid, pdbid1))
                kk = kk + 1

        #except:
        #print("FILE NOT FOUND")

    z.close()
def salt_bridge():
    import sys
    import re
    import gzip
    from Bio.PDB.MMCIFParser import MMCIFParser
    parser = MMCIFParser(QUIET=True)
    from Bio.PDB.Polypeptide import three_to_one as tto

    import numpy as np

    ALL = dict()
    ALL["D"] = "SB"
    ALL["E"] = "SB"
    ALL["K"] = "SB"
    ALL["R"] = "SB"
    ALL["H"] = "SB"

    ALLN = dict()
    ALLN["D"] = "SB"
    ALLN["E"] = "SB"

    ALLP = dict()
    ALLP["K"] = "SB"
    ALLP["R"] = "SB"
    ALLP["H"] = "SB"

    neg = ["D", "E"]
    negat = ["CG", "CD"]
    posi = ["K", "R", "H"]
    posat = ["NZ", "NE", ["ND1", "NE2"]]

    # ZONE VARIABLE
    zdis = 4.0

    # PATH FOR THE PDB/mmCIF FILES

    #pathmmcif = "/bmm/data/pdbmmcif/data/structures/all/mmCIF"
    pathmmcif = "/Volumes/BIOINFO/mmCIF"
    #pathPDB = "/bmm/data/rcsb/data/structures/all/pdb"
    #pathPDB = "/bmm/home/tkhanna1/Documents/Database/First_10000/test_set"

    dis = open("mut_data.txt", "r")
    ht = dis.readlines()
    dis.close()

    # DETERMINING THE ZONE AROUND THE MUTANT SITE TO DETERMINE ANY STRUCTURAL CHANGE TAKING PLACE DUE TO THE MUTATION

    z = open("{}".format(sys.argv[3]), "w")
    #temp = open("COM.txt","w")

    start = sys.argv[1]
    end = sys.argv[2]
    if end == "END" or end == "end":
        end = len(ht)
    end = int(end)
    k = int(start)

    while k < end:  # end = len(ht)
        mutant = []
        mu = ht[k].split()
        pos = mu[2].strip("\n")
        check1 = mu[3].strip("\n")
        check2 = mu[4].strip("\n")

        kk = 0
        while kk < 2:
            if check1 in ALL.keys() or check2 in ALL.keys():

                pdbid = mu[(kk + 0)].strip("\n")
                pdb = pdbid[0:4]  # PDB NAME
                C = pdbid[5:6]  # CHAIN

                print("*** {} :: {} of {} ***".format(pdb, k, len(ht)))

                # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE

                #cc = 0
                #if cc == 0:
                try:
                    fol = pdb[1:3]
                    pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb)
                    tar = gzip.open("{}".format(pdbfile), "rb")
                    out = open("pdbprocess{}.cif".format(start), "wb")
                    out.write(tar.read())
                    tar.close()
                    out.close()

                    structure_id = "{}".format(pdb)
                    filename = "pdbprocess{}.cif".format(start)
                    structure = parser.get_structure(structure_id, filename)

                    model = structure[0]

                    chain = model["{}".format(C)]
                    c1 = chain.get_list()  # LIST ALL THE RESIDUES

                    k1 = 0
                    resid_list = []
                    resname_list = []
                    com_list = []
                    while k1 < len(c1):
                        c2 = c1[k1].get_id()
                        resid = c2[1]
                        if c2[0] == " ":
                            residue = chain[c2]
                            tresname = residue.get_resname()
                            try:
                                resname = tto("{}".format(tresname))
                            except:
                                resname = "X"

                            count = 0
                            kn = 0
                            while kn < len(neg):
                                if resname == neg[kn]:
                                    count = count + 1
                                    ksel = kn
                                kn = kn + 1

                            if count == 0:
                                count1 = 0
                                kn = 0
                                while kn < len(posi):
                                    if resname == posi[kn]:
                                        count1 = count1 + 1
                                        ksel = kn
                                    kn = kn + 1

                            if count != 0:
                                r1 = residue.get_list(
                                )  # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE

                                k2 = 0
                                res = []  # ATOM NAMES
                                cd = []  # ATOM COORDINATES
                                countc = 0
                                while k2 < len(r1):
                                    r2 = r1[k2].get_id()
                                    if r2 == negat[ksel]:

                                        res.append(r2)
                                        atom = residue['{}'.format(r2)]
                                        a1 = atom.get_coord()
                                        list1 = [a1[0], a1[1], a1[2]]
                                        cd.append(list1)
                                        countc = countc + 1

                                    k2 = k2 + 1

                                if countc == 1:
                                    resid_list.append(resid)
                                    resname_list.append(resname)
                                    list1 = [cd[0][0], cd[0][1], cd[0][2]]
                                    com_list.append(list1)

                            if count1 != 0:
                                r1 = residue.get_list(
                                )  # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE
                                k2 = 0
                                res = []  # ATOM NAMES
                                cd = []  # ATOM COORDINATES
                                countc = 0
                                while k2 < len(r1):
                                    r2 = r1[k2].get_id()
                                    if ksel != 2:
                                        if r2 == posat[ksel]:
                                            res.append(r2)
                                            atom = residue['{}'.format(r2)]
                                            a1 = atom.get_coord()
                                            list1 = [a1[0], a1[1], a1[2]]
                                            cd.append(list1)
                                            countc = countc + 1
                                    else:
                                        if r2 == posat[ksel][0] or r2 == posat[
                                                ksel][1]:
                                            res.append(r2)
                                            atom = residue['{}'.format(r2)]
                                            a1 = atom.get_coord()
                                            list1 = [a1[0], a1[1], a1[2]]
                                            cd.append(list1)
                                            countc = countc + 1

                                    k2 = k2 + 1

                                if countc == 1:
                                    resid_list.append(resid)
                                    resname_list.append(resname)
                                    list1 = [cd[0][0], cd[0][1], cd[0][2]]
                                    com_list.append(list1)
                                if countc == 2:
                                    resid_list.append(resid)
                                    resname_list.append(resname)
                                    list1 = [
                                        cd[0][0], cd[0][1], cd[0][2], cd[1][0],
                                        cd[1][1], cd[1][2]
                                    ]
                                    com_list.append(list1)

                        k1 = k1 + 1

                    # DETERMINING THE ZONE AROUND THE MUTATED RESIDUE

                    k1 = 0
                    count = 0
                    while k1 < len(resid_list):
                        posc = resid_list[k1]
                        count = 0
                        if int(posc) == int(pos):
                            count = count + 1
                            rcpos = k1
                            k1 = len(resid_list)
                        k1 = k1 + 1

                    if count != 0:
                        hd = -1
                        hd1 = -1
                        zresname = "{}".format(resname_list[rcpos])
                        if zresname == "H":
                            v1 = com_list[rcpos]
                            # COMPUTING THE DISTANCES
                            list1 = []
                            for x in range(0, 2):
                                skip = (x * 3)
                                k3 = 0
                                while k3 < len(com_list):
                                    if k3 != rcpos:
                                        v2 = com_list[k3]
                                        zresnamec = "{}".format(
                                            resname_list[k3])
                                        if zresnamec in ALLN.keys():
                                            if zresnamec == "H" and int(
                                                    hd) != int(resid_list[k3]):
                                                for y in range(0, 2):
                                                    skip2 = y * 3
                                                    dis = (
                                                        v1[0 + skip] -
                                                        v2[0 + skip2])**2 + (
                                                            v1[1 + skip] -
                                                            v2[1 + skip2]
                                                        )**2 + (
                                                            v1[2 + skip] -
                                                            v2[2 + skip2])**2
                                                    if dis < (zdis * zdis):
                                                        list1.append(
                                                            resid_list[k3])
                                                        hd = resid_list[k3]
                                                        y = 3
                                            elif zresnamec != "H" and hd1 != 3:
                                                dis = (v1[0 + skip] -
                                                       v2[0])**2 + (
                                                           v1[1 + skip] - v2[1]
                                                       )**2 + (v1[2 + skip] -
                                                               v2[2])**2
                                                if dis < (zdis * zdis):
                                                    list1.append(
                                                        resid_list[k3])
                                                    hd1 = 3
                                    k3 = k3 + 1
                        else:
                            v1 = com_list[rcpos]
                            if zresname in ALLP.keys():
                                k3 = 0
                                list1 = []
                                while k3 < len(com_list):
                                    if k3 != rcpos:
                                        v2 = com_list[k3]
                                        zresnamec = "{}".format(
                                            resname_list[k3])
                                        if zresnamec in ALLN.keys():
                                            if zresnamec == "H" and int(
                                                    hd) != int(resid_list[k3]):
                                                for x in range(0, 2):
                                                    skip = (x * 3)
                                                    dis = (
                                                        v1[0] - v2[0 + skip]
                                                    )**2 + (
                                                        v1[1] - v2[1 + skip]
                                                    )**2 + (v1[2] -
                                                            v2[2 + skip])**2
                                                    if dis < (zdis * zdis):
                                                        list1.append(
                                                            resid_list[k3])
                                                        hd = resid_list[k3]
                                                        x = 2
                                            elif zresnamec != "H":
                                                dis = (v1[0] - v2[0])**2 + (
                                                    v1[1] - v2[1])**2 + (
                                                        v1[2] - v2[2])**2
                                                if dis < (zdis * zdis):
                                                    list1.append(
                                                        resid_list[k3])
                                    k3 = k3 + 1

                            elif zresname in ALLN.keys():
                                k3 = 0
                                list1 = []
                                while k3 < len(com_list):
                                    if k3 != rcpos:
                                        v2 = com_list[k3]
                                        zresnamec = "{}".format(
                                            resname_list[k3])
                                        if zresnamec in ALLP.keys():
                                            if zresnamec == "H" and int(
                                                    hd) != int(resid_list[k3]):
                                                for x in range(0, 2):
                                                    skip = (x * 3)
                                                    dis = (
                                                        v1[0] - v2[0 + skip]
                                                    )**2 + (
                                                        v1[1] - v2[1 + skip]
                                                    )**2 + (v1[2] -
                                                            v2[2 + skip])**2
                                                    if dis < (zdis * zdis):
                                                        list1.append(
                                                            resid_list[k3])
                                                        hd = resid_list[k3]
                                                        x = 2
                                            elif zresnamec != "H":
                                                dis = (v1[0] - v2[0])**2 + (
                                                    v1[1] - v2[1])**2 + (
                                                        v1[2] - v2[2])**2
                                                if dis < (zdis * zdis):
                                                    list1.append(
                                                        resid_list[k3])
                                    k3 = k3 + 1

                        if len(list1) > 0:
                            if kk == 0:
                                z.write("{} {} {} {} {} YES WT {} {}\n".format(
                                    mu[0], mu[1], mu[2], mu[3],
                                    mu[4].strip("\n"), list1, len(list1)))
                                print("{} {} {} {} {} YES {} WT".format(
                                    mu[0], mu[1], mu[2], mu[3],
                                    mu[4].strip("\n"), list1))
                            else:
                                z.write(
                                    "{} {} {} {} {} YES MUT {} {}\n".format(
                                        mu[0], mu[1], mu[2], mu[3],
                                        mu[4].strip("\n"), list1, len(list1)))
                                print("{} {} {} {} {} YES {} MUT".format(
                                    mu[0], mu[1], mu[2], mu[3],
                                    mu[4].strip("\n"), list1))

                        else:
                            if kk == 0:
                                z.write("{} {} {} {} {} NO WT\n".format(
                                    mu[0], mu[1], mu[2], mu[3],
                                    mu[4].strip("\n")))
                            else:
                                z.write("{} {} {} {} {} NO MUT\n".format(
                                    mu[0], mu[1], mu[2], mu[3],
                                    mu[4].strip("\n")))
                    else:
                        if kk == 0:
                            z.write("{} {} {} {} {} NO WT\n".format(
                                mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n")))
                        else:
                            z.write("{} {} {} {} {} NO MUT\n".format(
                                mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n")))

                except:
                    print("FILE NOT FOUND")
                    if kk == 0:
                        z.write("{} {} {} {} {} NO WT\n".format(
                            mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n")))
                    else:
                        z.write("{} {} {} {} {} NO MUT\n".format(
                            mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n")))

                kk = kk + 1

            else:
                kk = 2
                #print("NOT SALT BRIDGE RESIDUES")
                z.write("{} {} {} {} {} NO WT\n".format(
                    mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n")))
                z.write("{} {} {} {} {} NO MUT\n".format(
                    mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n")))

        k = k + 1

    z.close()
def main_func():

    pf = PFAM("pdb_pfam_mapping.txt")
    ca = cath("cath_domain.txt")
    md = mut_dat("set1_sc.txt")

    f = open("PFAM_dis.txt", "w")

    g = open("CATH_dis.txt", "w")

    import sys
    import re
    import gzip
    from Bio.PDB.MMCIFParser import MMCIFParser
    parser = MMCIFParser(QUIET=True)
    from Bio.PDB.Polypeptide import three_to_one as tto

    # ZONE VARIABLE
    zdis = 10.0

    # PATH FOR THE PDB/mmCIF FILES

    #pathmmcif = "/Users/tarun/Documents/mmCIF"
    #pathmmcif = "/Volumes/BIOINFO/mmCIF"
    pathmmcif = "/data/pdb/divided/mmCIF"

    d1 = dict()
    sd = dict()
    nf = 0
    for x in md:
        try:
            t1 = "{}".format(ca["{}".format(x)])
            if t1 not in d1.keys() and t1 == "1":
                d1["{}".format(t1)] = [x]
                if x not in sd.keys():
                    sd["{}".format(x)] = md["{}".format(x)]
            elif t1 == "1":
                d1["{}".format(t1)].append(x)
                if x not in sd.keys():
                    sd["{}".format(x)] = md["{}".format(x)]
        except:
            nf = nf + 1
            #print("{} NOT FOUND IN CATH".format(x))

    #print("{} OUT OF {} ARE NOT FOUND IN CATH FILE".format(nf,len(md)))

    k = 1
    for x in d1:
        t1 = len(d1["{}".format(x)])
        g.write("{} {} {}\n".format(k, x, t1))
        k = k + 1

    d = dict()
    nf = 0
    for x in sd:
        try:
            k1 = 0
            while k1 < len(pf["{}".format(x)]):
                #print(pf["{}".format(x))
                t1 = pf["{}".format(x)][k1][2]
                if t1 not in d.keys():
                    d["{}".format(t1)] = [x]
                else:
                    d["{}".format(t1)].append(x)
                k1 = k1 + 1
        except:
            nf = nf + 1
            #print("{} NOT FOUND IN PFAM".format(x))

    #print("{} OUT OF {} ARE NOT FOUND IN PFAM FILE".format(nf,len(md)))

    k = 1
    for x in d:
        t1 = len(d["{}".format(x)])
        f.write("{} {} {}\n".format(k, x, t1))
        k = k + 1

    f.close()
    g.close()

    # LOCAL RMSD OF THE POLYPEPTIDE PRESENT IN DICTIONARY sd

    # DETERMINING THE ZONE AROUND THE MUTANT SITE TO DETERMINE ANY STRUCTURAL CHANGE TAKING PLACE DUE TO THE MUTATION

    z = open("{}".format(sys.argv[3]), "w")
    #temp = open("COM.txt","w")

    start = sys.argv[1]
    end = sys.argv[2]
    if end == "END" or end == "end":
        end = len(sd)
    end = int(end)
    start = int(start)
    k = 0
    for x in sd.keys():
        print("# {} of {} #".format(k, end))
        if k >= start:
            pdbid = "{}".format(x)
            pdb = pdbid[0:4]
            cw = pdbid[5:6]

            # CHECKING IF THE PDB's ARE ALIGNED
            #count1 = 0
            #if count1 == 0:
            try:
                fol = pdb[1:3]
                pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb)
                tar = gzip.open("{}".format(pdbfile), "rb")
                out = open("pdbprocess1{}.cif".format(start), "wb")
                out.write(tar.read())
                tar.close()
                out.close()

                mmcif = MMCIF2Dict("pdbprocess1{}.cif".format(start))
                idmap1 = seqres_atom_map(mmcif)

                kk = 0
                while kk < len(sd["{}".format(x)]):
                    pdbid1 = "{}".format(sd["{}".format(x)][kk])
                    #print(pdbid1)
                    pdb1 = pdbid1[0:4]
                    cm = pdbid1[5:6]

                    fol = pdb1[1:3]
                    pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb1)
                    tar = gzip.open("{}".format(pdbfile), "rb")
                    out = open("pdbprocess2{}.cif".format(start), "wb")
                    out.write(tar.read())
                    tar.close()
                    out.close()

                    mmcif = MMCIF2Dict("pdbprocess2{}.cif".format(start))
                    idmap2 = seqres_atom_map(mmcif)

                    count = 0
                    for i in idmap1.keys():
                        if i[1] == cw:
                            for m in idmap2.keys():
                                if m[1] == cm and i[0] == m[0]:
                                    if idmap2[m] != idmap1[i]:
                                        count = count + 1

                    count2 = 0
                    if count == 0:

                        if count2 == 0:
                            count2 = count2 + 1
                            #print("*** {} AND {} :: {} of {} ***" .format(pdbid,pdbid1,kk,len(sd["{}".format(x)])))

                            # FOR WILDTYPE

                            # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE

                            structure_id = "{}".format(pdb)
                            filename = "pdbprocess1{}.cif".format(start)
                            structure = parser.get_structure(
                                structure_id, filename)

                            model = structure[0]

                            chain = model["{}".format(cw)]
                            c1 = chain.get_list()  # LIST ALL THE RESIDUES

                            k1 = 0
                            resid_list = []
                            resname_list = []
                            com_list = []
                            while k1 < len(c1):
                                c2 = c1[k1].get_id()
                                resid = c2[1]
                                if c2[0] == " ":
                                    residue = chain[c2]
                                    tresname = residue.get_resname()
                                    try:
                                        resname = tto("{}".format(tresname))
                                    except:
                                        resname = "X"
                                    r1 = residue.get_list(
                                    )  # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE

                                    k2 = 0
                                    res = []  # ATOM NAMES
                                    cd = []  # ATOM COORDINATES
                                    while k2 < len(r1):
                                        r2 = r1[k2].get_id()
                                        # COM OF THE BACKBONE
                                        if r2 == "CA" or r2 == "N" or r2 == "C" or r2 == "O":
                                            res.append(r2)

                                            atom = residue['{}'.format(r2)]
                                            a1 = atom.get_coord()
                                            cd.append(a1)
                                        k2 = k2 + 1

                                    CM = COM(res, cd)
                                    resid_list.append(resid)
                                    resname_list.append(resname)
                                    com_list.append(CM)

                                k1 = k1 + 1

                            k1 = 20
                            ss = int((len(resid_list) - 40) / 5)
                            if ss < 6:
                                ss = 6
                            while k1 < (len(resid_list) - 20):
                                v1 = com_list[k1]
                                zres = "{}".format(resid_list[k1])
                                zresname = "{}".format(resname_list[k1])
                                k2 = 0
                                while k2 < len(resid_list):
                                    if k2 != k1:
                                        v2 = com_list[k2]
                                        dis = (v1[0] - v2[0])**2 + (
                                            v1[1] - v2[1])**2 + (v1[2] -
                                                                 v2[2])**2
                                        if dis < (zdis * zdis):
                                            zres = zres + ",{}".format(
                                                resid_list[k2])
                                            zresname = zresname + "{}".format(
                                                resname_list[k2])
                                    k2 = k2 + 1

                                k1 = k1 + ss

                                z.write("{} {} {} {} {}\n".format(
                                    pdb, cw, pdb1, cm, zres))

                                #z.write("{}".format(pdbid))
                                #z.write("\n")
                                #z.write("{}".format(zres))
                                #z.write("\n")
                                #z.write("{}".format(zresname))
                                #z.write("\n")

                        # FOR MUTANT

                        #structure_id = "{}".format(pdb1)
                        #filename = "pdbprocess2.cif"
                        #structure = parser.get_structure(structure_id,filename)

                        #model = structure[0]

                        #chain = model["{}".format(cm)]
                        #c1 = chain.get_list()		# LIST ALL THE RESIDUES

                        #k1 = 0
                        #resid_list = []
                        #resname_list = []
                        #com_list = []
                        #while k1 < len(c1):
                        #c2 = c1[k1].get_id()
                        #resid = c2[1]
                        #if c2[0] == " ":
                        #residue = chain[c2]
                        #tresname = residue.get_resname()
                        #try:
                        #resname = tto("{}".format(tresname))
                        #except:
                        #resname = "X"
                        #r1 = residue.get_list() # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE

                        #k2 = 0
                        #res = []	# ATOM NAMES
                        #cd = []		# ATOM COORDINATES
                        #while k2 < len(r1):
                        #r2 = r1[k2].get_id()
                        # COM OF THE BACKBONE
                        #if r2 == "CA" or r2 == "N" or r2 == "C" or r2 == "O":
                        #res.append(r2)

                        #atom = residue['{}'.format(r2)]
                        #a1 = atom.get_coord()
                        #cd.append(a1)
                        #k2 = k2 + 1

                        #CM = COM(res,cd)
                        #resid_list.append(resid)
                        #resname_list.append(resname)
                        #com_list.append(CM)

                        #k1 = k1 + 1

                        #k1 = 20
                        #ss = int((len(resid_list) - 40) / 5)
                        #if ss < 6:
                        #ss = 6
                        #while k1 < (len(resid_list) -20):
                        #v1 = com_list[k1]
                        #zres = "{}".format(resid_list[k1])
                        #zresname = "{}".format(resname_list[k1])
                        #k2 = 0
                        #while k2 < len(resid_list):
                        #if k2 != k1:
                        #v2 = com_list[k2]
                        #dis = (v1[0]-v2[0])**2 + (v1[1]-v2[1])**2 + (v1[2]-v2[2])**2
                        #if dis < (zdis*zdis):
                        #zres = zres + ",{}".format(resid_list[k2])
                        #zresname = zresname +  "{}".format(resname_list[k2])
                        #k2 = k2 + 1

                        #z.write("{}".format(pdbid1))
                        #z.write("\n")
                        #z.write("{}".format(zres))
                        #z.write("\n")
                        #z.write("{}".format(zresname))
                        #z.write("\n")

                        #k1 = k1 + ss

                    else:
                        print("WT AND MUT MISMATCH")
                    kk = kk + 1

            except:
                print("FILE NOT FOUND")
                #z.write("{}".format(pdbid))
                #z.write("\n")
                #z.write("NA")
                #z.write("\n")
                #z.write("NA")
                #xz.write("\n")

        k = k + 1
        if k > end:
            break

    z.close()
Example #7
0
def func2(arg1):

    from Bio.PDB.Polypeptide import three_to_one as tto

    AA = [
        "A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F",
        "P", "S", "T", "W", "Y", "V"
    ]

    AT = [["CB"], ["CB", "CG", "CD", "NE", "CZ", "NH1", "NH2"],
          ["CB", "CG", "OD1", "ND2"], ["CB", "CG", "OD1", "OD2"], ["CB", "SG"],
          ["CB", "CG", "CD", "OE1", "OE2"], ["CB", "CG", "CD", "OE1", "NE2"],
          [], ["CB", "CG", "ND1", "CE1", "NE2", "CD2"],
          ["CB", "CG1", "CG2", "CD1"], ["CB", "CG", "CD1", "CD2"],
          ["CB", "CG", "CD", "CE", "NZ"], ["CB", "CG", "SD", "CE"],
          ["CB", "CG", "CD1", "CE1", "CZ", "CE2", "CD2"], ["CB", "CG", "CD"],
          ["CB", "OG"], ["CB", "CG2", "OG1"],
          ["CB", "CG", "CD1", "NE1", "CE2", "CD2", "CE3", "CZ3", "CH2", "CZ2"],
          ["CB", "CG", "CD1", "CE1", "CZ", "OH", "CE2", "CD2"],
          ["CB", "CG1", "CG2"]]

    IN = [[], ["NH1", "NH2"], [], ["OD1", "OD2"], [], ["OE1", "OE2"], [], [],
          [], [], ["CD1", "CD2"], [], [], ["CD1", "CD2"], [], [], [], [],
          ["CD1", "CD2"], ["CG1", "CG2"]]

    IN1 = [[], [], [], [], [], [], [], [], [], [], [], [], [], ["CE1", "CE2"],
           [], [], [], [], ["CE1", "CE2"], []]

    k = 0
    dat = dict()
    drot = dict()
    drot1 = dict()
    while k < len(AA):
        dat[AA[k]] = AT[k]
        drot[AA[k]] = IN[k]
        drot1[AA[k]] = IN1[k]
        k = k + 1

    from Bio.PDB.PDBParser import PDBParser
    parser = PDBParser(QUIET=True)

    # CONVERTING RASMOL FILES INTO PDB FORMAT

    f = open("output_all", "r")
    ft = f.readlines()
    f.close()

    g = open("file1.pdb", "w")  # ONLY C-ALPHA

    k = 0
    while k < len(ft):
        ft1 = ft[k].split()
        if ft1[0] == "ATOM" or ft1[0].strip(
                "\n") == "TER" or ft1[0] == "REMARK":
            g.write("{}".format(ft[k]))
        k = k + 1
    g.close()

    f = open("output_all_atm", "r")
    ft = f.readlines()
    f.close()

    g = open("file2.pdb", "w")  # ALL ATOMS

    k = 0
    while k < len(ft):
        ft1 = ft[k].split()
        if ft1[0] == "ATOM" or ft1[0].strip(
                "\n") == "TER" or ft1[0] == "REMARK":
            g.write("{}".format(ft[k]))
        k = k + 1
    g.close()

    # READING THE GLOBAL RMSD VALUE AND THE COVERAGE VALUE

    f = open("temp", "r")
    ft = f.readlines()
    f.close()

    k = 0
    ft1 = ft[k].split()
    try:
        t1 = ft1[0]
    except:
        t1 = "NA"
    while t1 != "Length":
        k = k + 1
        ft1 = ft[k].split()
        try:
            t1 = ft1[0]
        except:
            t1 = "NA"

    ft1 = ft[k].split()
    lenwt = ft1[3]
    k = k + 1
    ft1 = ft[k].split()
    lenmut = ft1[3]

    ft1 = ft[k].split()
    try:
        t1 = ft1[0]
    except:
        t1 = "NA"
    while t1 != "Aligned":
        k = k + 1
        ft1 = ft[k].split()
        try:
            t1 = ft1[0]
        except:
            t1 = "NA"

    ft1 = ft[k].split(",")
    t1 = ft1[1].split()
    grmsd = t1[1]
    t1 = ft1[0].split()
    coverage = t1[2]

    g = open("results", "w")

    g.write("{} {} {} {}\n".format(grmsd, lenwt, lenmut, coverage))

    if arg1 == 1:

        reswt1 = []
        reswt2 = []
        resmut1 = []
        resmut2 = []
        cdwt1 = []
        cdmut1 = []
        cdwt2 = []
        cdmut2 = []
        atwt2 = []
        atmut2 = []
        resnamewt1 = []

        list1 = sys.argv[2].split(",")
        wt = dict()
        k = 1
        while k < len(list1):
            wt[int(list1[k])] = "in"
            k = k + 1

        # LOCAL RMSD BASED ON C-ALPHA CARBONS AND SIDE CHAINS

        pdb = "file2"
        structure_id = "{}".format(pdb)
        filename = "{}.pdb".format(pdb)
        structure = parser.get_structure(structure_id, filename)
        model = structure[0]

        # FOR WILD TYPE

        chain = model["A"]
        c1 = chain.get_list()  # LIST ALL THE RESIDUES
        k1 = 0
        while k1 < len(c1):
            c2 = c1[k1].get_id()
            resid = c2[1]
            if resid in wt.keys():
                reswt1.append(resid)
                residue = chain[c2]
                tresname = residue.get_resname()
                resname = tto("{}".format(tresname))
                resnamewt1.append(resname)
                r1 = residue.get_list()  # LIST ALL THE ATOMS

                k2 = 0
                while k2 < len(r1):
                    r2 = r1[k2].get_id()
                    if r2 == "CA":
                        atom = residue["{}".format(r2)]
                        a1 = atom.get_coord()
                        cdwt1.append(a1)
                    if r2 != "CA" and r2 != "N" and r2 != "C" and r2 != "O" and r2[
                            0:1] != "H":

                        # ONLY SIDE CHAIN
                        atwt2.append(r2)
                        atom = residue["{}".format(r2)]
                        a1 = atom.get_coord()
                        cdwt2.append(a1)

                    k2 = k2 + 1

            k1 = k1 + 1

        # FOR MUTANT

        chain = model["B"]
        c1 = chain.get_list()  # LIST ALL THE RESIDUES
        k1 = 0
        while k1 < len(c1):
            c2 = c1[k1].get_id()
            resid = c2[1]
            if resid in wt.keys():
                resmut1.append(resid)
                residue = chain[c2]
                r1 = residue.get_list()  # LIST ALL THE ATOMS

                k2 = 0
                while k2 < len(r1):
                    r2 = r1[k2].get_id()
                    if r2 == "CA":
                        atom = residue["{}".format(r2)]
                        a1 = atom.get_coord()
                        cdmut1.append(a1)
                    if r2 != "CA" and r2 != "N" and r2 != "C" and r2 != "O" and r2[
                            0:1] != "H":

                        # ONLY SIDE CHAIN
                        atmut2.append(r2)
                        atom = residue["{}".format(r2)]
                        a1 = atom.get_coord()
                        cdmut2.append(a1)

                    k2 = k2 + 1

            k1 = k1 + 1

        # CALCULATING LOCAL RMSD FOR C-ALPHA AND SIDE CHAINS

        if len(cdwt1) == len(cdmut1):
            lrmsd1 = RMSD(cdwt1, cdmut1, len(reswt1))
        else:
            lrmsd1 = "NA"
        if len(cdwt2) == len(cdmut2):
            lrmsd2 = RMSD_SC(cdwt2, cdmut2, len(reswt1), atwt2, atmut2,
                             resnamewt1, reswt1, dat, drot, drot1)
        else:
            lrmsd2 = "NA"

        g.write("{} {}".format(lrmsd1, lrmsd2))

    g.close()
Example #8
0
def disulphide_bond():
    import sys
    import re
    import gzip
    from Bio.PDB.MMCIFParser import MMCIFParser
    parser = MMCIFParser(QUIET=True)
    from Bio.PDB.Polypeptide import three_to_one as tto

    import numpy as np

    # ZONE VARIABLE
    zdis = 2.2

    # PATH FOR THE PDB/mmCIF FILES

    #pathmmcif = "/bmm/data/pdbmmcif/data/structures/all/mmCIF"
    pathmmcif = "/Volumes/BIOINFO/mmCIF"
    #pathPDB = "/bmm/data/rcsb/data/structures/all/pdb"
    #pathPDB = "/bmm/home/tkhanna1/Documents/Database/First_10000/test_set"

    dis = open("mut_data.txt", "r")
    ht = dis.readlines()
    dis.close()

    # DETERMINING THE ZONE AROUND THE MUTANT SITE TO DETERMINE ANY STRUCTURAL CHANGE TAKING PLACE DUE TO THE MUTATION

    z = open("{}".format(sys.argv[3]), "w")
    #temp = open("COM.txt","w")

    start = sys.argv[1]
    end = sys.argv[2]
    if end == "END" or end == "end":
        end = len(ht)
    end = int(end)
    k = int(start)
    while k < end:  # end = len(ht)
        mutant = []
        mu = ht[k].split()

        check1 = mu[3].strip("\n")
        check2 = mu[4].strip("\n")
        pos = mu[2].strip("\n")

        count3 = 0
        if check1 == "C":
            pdbid = mu[0].strip("\n")
            pdb = pdbid[0:4]  # PDB NAME
            C = pdbid[5:6]  # CHAIN
            count3 = count3 + 1
        elif check2 == "C":
            pdbid = mu[1].strip("\n")
            pdb = pdbid[0:4]  # PDB NAME
            C = pdbid[5:6]  # CHAIN
            count3 = count3 + 1

        if count3 != 0:

            print("*** {} :: {} of {} ***".format(pdb, k, len(ht)))

            # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE

            #cc = 0
            #if cc == 0:
            try:
                fol = pdb[1:3]
                pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb)
                tar = gzip.open("{}".format(pdbfile), "rb")
                out = open("pdbprocess{}.cif".format(start), "wb")
                out.write(tar.read())
                tar.close()
                out.close()

                structure_id = "{}".format(pdb)
                filename = "pdbprocess{}.cif".format(start)
                structure = parser.get_structure(structure_id, filename)

                model = structure[0]

                chain = model["{}".format(C)]
                c1 = chain.get_list()  # LIST ALL THE RESIDUES

                k1 = 0
                resid_list = []
                resname_list = []
                com_list = []
                com_list1 = []
                while k1 < len(c1):
                    c2 = c1[k1].get_id()
                    resid = c2[1]
                    if c2[0] == " ":
                        residue = chain[c2]
                        tresname = residue.get_resname()
                        try:
                            resname = tto("{}".format(tresname))
                        except:
                            resname = "X"
                        if resname == "C":
                            r1 = residue.get_list(
                            )  # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE

                            k2 = 0
                            res = []  # ATOM NAMES
                            cd = []  # ATOM COORDINATES CB
                            cd1 = []  # ATOM COORDINATES SG
                            count = 0
                            while k2 < len(r1):
                                r2 = r1[k2].get_id()
                                # COM OF THE BACKBONE
                                if r2 == "CB":
                                    res.append(r2)
                                    atom = residue['{}'.format(r2)]
                                    a1 = atom.get_coord()
                                    list1 = [a1[0], a1[1], a1[2]]
                                    cd.append(list1)
                                    count = count + 1
                                if r2 == "SG":
                                    res.append(r2)
                                    atom = residue['{}'.format(r2)]
                                    a1 = atom.get_coord()
                                    list1 = [a1[0], a1[1], a1[2]]
                                    cd1.append(list1)
                                    count = count + 1

                                k2 = k2 + 1

                            if count == 2:
                                resid_list.append(resid)
                                resname_list.append(resname)
                                list1 = [cd[0][0], cd[0][1], cd[0][2]]
                                com_list.append(list1)
                                list1 = [cd1[0][0], cd1[0][1], cd1[0][2]]
                                com_list1.append(list1)

                    k1 = k1 + 1

                # DETERMINING THE ZONE AROUND THE MUTATED RESIDUE

                k1 = 0
                count = 0
                while k1 < len(resid_list):
                    posc = resid_list[k1]
                    count = 0
                    if int(posc) == int(pos):
                        count = count + 1
                        rcpos = k1
                        k1 = len(resid_list)
                    k1 = k1 + 1

                if count != 0:
                    zresname = "{}".format(resname_list[rcpos])
                    v1 = com_list1[rcpos]
                    v11 = com_list[rcpos]

                    # COMPUTING THE DISTANCES

                    k3 = 0
                    list1 = []
                    while k3 < len(com_list1):
                        if k3 != rcpos:
                            v2 = com_list1[k3]
                            v21 = com_list[k3]
                            dis = (v1[0] - v2[0])**2 + (v1[1] - v2[1])**2 + (
                                v1[2] - v2[2])**2
                            if dis < (zdis * zdis):
                                p = np.array([v1, v11, v2, v21])
                                ang = dihedral(p)
                                # DIHEDRAL CRITERIA
                                if ang > 75.0 and ang < 105.0:
                                    list1.append(resid_list[k3])
                        k3 = k3 + 1

                    if len(list1) > 0:
                        z.write("{} {} {} {} {} YES {}\n".format(
                            mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"),
                            list1))
                        print("{} {} {} {} {} YES {}".format(
                            mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"),
                            list1))
                    else:
                        z.write("{} {} {} {} {} NO\n".format(
                            mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n")))
                else:
                    z.write("{} {} {} {} {} NO\n".format(
                        mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n")))

            except:
                print("FILE NOT FOUND")
                z.write("{} {} {} {} {} NO\n".format(mu[0], mu[1], mu[2],
                                                     mu[3], mu[4].strip("\n")))

        else:
            #print("NOT CYSTINE")
            z.write("{} {} {} {} {} NO\n".format(mu[0], mu[1], mu[2], mu[3],
                                                 mu[4].strip("\n")))

        k = k + 1

    z.close()
Example #9
0
def neighbours():
    import sys
    import re
    import gzip
    from Bio.PDB.MMCIFParser import MMCIFParser
    parser = MMCIFParser(QUIET=True)
    from Bio.PDB.Polypeptide import three_to_one as tto

    # ZONE VARIABLE
    zdis = 10.0

    # PATH FOR THE PDB/mmCIF FILES

    pathmmcif = "/Users/tarun/Documents/mmCIF"

    #pathmmcif = "/bmm/data/pdbmmcif/data/structures/all/mmCIF"
    #pathPDB = "/bmm/data/rcsb/data/structures/all/pdb"
    #pathPDB = "/bmm/home/tkhanna1/Documents/Database/First_10000/test_set"

    dis = open("distinct_mutants_only_cluster.txt", "r")
    ht = dis.readlines()
    dis.close()

    # DETERMINING THE ZONE AROUND THE MUTANT SITE TO DETERMINE ANY STRUCTURAL CHANGE TAKING PLACE DUE TO THE MUTATION

    z = open("{}".format(sys.argv[3]), "w")
    #temp = open("COM.txt","w")

    start = sys.argv[1]
    end = int(sys.argv[2])
    k = int(start)
    while k < end:  # end = len(ht)
        mutant = []
        mu = ht[k].split(',')

        pdbid = mu[0].strip('[|\,|\'|]')
        pdb = pdbid[0:4]  # PDB NAME
        C = pdbid[5:6]  # CHAIN

        print("*** {} :: {} of {} ***".format(pdb, k, len(ht)))

        mutant.append(pdbid)

        # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE

        try:
            fol = pdb[1:3]
            pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb)
            tar = gzip.open("{}".format(pdbfile), "rb")
            out = open("pdbprocess{}.cif".format(start), "wb")
            out.write(tar.read())
            tar.close()
            out.close()

            structure_id = "{}".format(pdb)
            filename = "pdbprocess{}.cif".format(start)
            structure = parser.get_structure(structure_id, filename)

            model = structure[0]

            chain = model["{}".format(C)]
            c1 = chain.get_list()  # LIST ALL THE RESIDUES

            k1 = 0
            resid_list = []
            resname_list = []
            com_list = []
            avg_tf = []
            max_tf = []
            while k1 < len(c1):
                c2 = c1[k1].get_id()
                resid = c2[1]
                if c2[0] == " ":
                    residue = chain[c2]
                    tresname = residue.get_resname()
                    try:
                        resname = tto("{}".format(tresname))
                    except:
                        resname = "X"
                    r1 = residue.get_list(
                    )  # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE

                    k2 = 0
                    res = []  # ATOM NAMES
                    cd = []  # ATOM COORDINATES
                    bf = []
                    while k2 < len(r1):
                        r2 = r1[k2].get_id()
                        # COM OF THE BACKBONE
                        if r2 == "CA" or r2 == "N" or r2 == "C" or r2 == "O":
                            res.append(r2)

                            atom = residue['{}'.format(r2)]
                            a1 = atom.get_coord()
                            tf = atom.get_bfactor()
                            cd.append(a1)
                            bf.append(tf)
                        k2 = k2 + 1

                    TF = temp_factor(bf)
                    CM = COM(res, cd)
                    resid_list.append(resid)
                    resname_list.append(resname)
                    com_list.append(CM)
                    avg_tf.append(TF[0])
                    max_tf.append(TF[1])

                k1 = k1 + 1
            #temp.write("{}".format(com_list))
            #temp.write("\n")

            # DETERMINING THE ZONE AROUND THE MUTATED RESIDUE

            k1 = 1
            while k1 < len(mu):
                pos = mu[k1].strip(' |[|,|]|\'|\n')
                pos = int(pos)
                zres = "{}".format(pos)
                zresname = "NA"
                k2 = 0
                count = 0
                while k2 < len(resid_list):
                    posc = int(resid_list[k2])
                    if posc == pos:
                        count = count + 1
                        rcpos = k2
                        k2 = len(resid_list)
                    k2 = k2 + 1

                if count != 0:
                    avgtf = "{}".format(avg_tf[rcpos])
                    maxtf = "{}".format(max_tf[rcpos])
                    v1 = com_list[rcpos]
                    # COMPUTING THE DISTANCES

                    k3 = 0
                    while k3 < len(com_list):
                        if k3 != rcpos:
                            v2 = com_list[k3]
                            dis = (v1[0] - v2[0])**2 + (v1[1] - v2[1])**2 + (
                                v1[2] - v2[2])**2
                            if dis < (zdis * zdis):
                                zres = zres + ",{}".format(resid_list[k3])
                                avgtf = avgtf + ",{}".format(avg_tf[k3])
                                maxtf = maxtf + ",{}".format(max_tf[k3])
                        k3 = k3 + 1

                z.write("{}".format(pdbid))
                z.write("\n")
                z.write("{}".format(zres))
                z.write("\n")
                z.write("{}".format(avgtf))
                z.write("\n")
                z.write("{}".format(maxtf))
                z.write("\n")
                k1 = k1 + 1
        except:
            print("FILE NOT FOUND")
            z.write("{}".format(pdbid))
            z.write("\n")
            z.write("NA")
            z.write("\n")
            z.write("NA")
            z.write("\n")
            z.write("NA")
            z.write("\n")

        k = k + 1

    #temp.close()
    z.close()
Example #10
0
def neighbours():
    import sys
    import re
    import gzip
    from Bio.PDB.PDBParser import PDBParser
    parser = PDBParser(PERMISSIVE=0, QUIET=True)
    from Bio.PDB.Polypeptide import three_to_one as tto

    # ZONE VARIABLE
    zdis = 10.0

    # PATH FOR THE PDB/mmCIF FILES

    pathmmcif = "/bmm/data/pdbmmcif/data/structures/all/mmCIF"
    pathPDB = "/bmm/data/rcsb/data/structures/all/pdb"
    #pathPDB = "/bmm/home/tkhanna1/Documents/Database/First_10000/test_set"

    file1 = sys.argv[1]
    f = open("{}".format(file1), "r")  # MUTANTS
    ft = f.readlines()
    f.close()

    file2 = sys.argv[2]
    g = open("{}".format(file2), "r")  # MUTATIONS
    gt = g.readlines()
    g.close()
    h = open("structure.txt", "w")

    # DETERMINING THE DISTINCT MUTATIONS AND MUTANTS

    dis = open("distinct_mutants.txt", "w")
    k = 0
    while k < len(ft):
        dis_mut = []
        mu = ft[k].split(',')
        mut = gt[k].split(',')

        if len(mu) > 1:
            pdb = mu[0]
            dis_mut.append(pdb)
            k1 = 1
            while k1 < len(mut):
                dumstr2 = mut[k1].strip("\n")
                mut_res = dumstr2[1:len(dumstr2) - 1]
                k2 = 1
                count = 0
                while k2 < len(dis_mut):
                    if dis_mut[k2] == mut_res:
                        count = count + 1
                    k2 = k2 + 1
                if count == 0:
                    dis_mut.append(mut_res)
                k1 = k1 + 1
            dis.write("{}".format(dis_mut))
            dis.write("\n")
        k = k + 1

    dis.close()

    dis = open("distinct_mutants.txt", "r")
    ht = dis.readlines()
    dis.close()

    # DETERMINING THE ZONE AROUND THE MUTANT SITE TO DETERMINE ANY STRUCTURAL CHANGE TAKING PLACE DUE TO THE MUTATION

    z = open("zone.txt", "w")
    temp = open("COM.txt", "w")

    k = len(ht) - 10
    while k < len(ht):  # end = len(ht)
        mutant = []
        mu = ht[k].split(',')

        pdbid = mu[0].strip('[|\,|\'|]')
        pdb = pdbid[0:4]  # PDB NAME
        C = pdbid[5:6]  # CHAIN

        print("*** {} :: {} of {} ***".format(pdb, k, len(ht)))

        mutant.append(pdbid)

        # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE

        try:
            pdbfile = "{}/pdb{}.ent.gz".format(pathPDB, pdb)
            tar = gzip.open("{}".format(pdbfile), "rb")
            out = open("pdbprocess.pdb", "wb")
            out.write(tar.read())
            tar.close()
            out.close()

            structure_id = "{}".format(pdb)
            filename = "pdbprocess.pdb"
            structure = parser.get_structure(structure_id, filename)

            model = structure[0]

            chain = model["{}".format(C)]
            c1 = chain.get_list()  # LIST ALL THE RESIDUES

            k1 = 0
            resid_list = []
            resname_list = []
            com_list = []
            while k1 < len(c1):
                c2 = c1[k1].get_id()
                resid = c2[1]
                if c2[0] == " ":
                    residue = chain[c2]
                    tresname = residue.get_resname()
                    try:
                        resname = tto("{}".format(tresname))
                    except:
                        resname = "X"
                    r1 = residue.get_list(
                    )  # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE

                    k2 = 0
                    res = []  # ATOM NAMES
                    cd = []  # ATOM COORDINATES
                    while k2 < len(r1):
                        r2 = r1[k2].get_id()
                        # COM OF THE BACKBONE
                        if r2 == "CA" or r2 == "N" or r2 == "C" or r2 == "O":
                            res.append(r2)

                            atom = residue['{}'.format(r2)]
                            a1 = atom.get_coord()
                            cd.append(a1)
                        k2 = k2 + 1

                    CM = COM(res, cd)
                    resid_list.append(resid)
                    resname_list.append(resname)
                    com_list.append(CM)

                k1 = k1 + 1
            temp.write("{}".format(com_list))
            temp.write("\n")

            # DETERMINING THE ZONE AROUND THE MUTATED RESIDUE

            k1 = 1
            while k1 < len(mu):
                pos = mu[k1].strip(' |[|,|]|\'|\n')
                pos = int(pos)
                z.write("{}".format(pdbid))
                z.write("\n")
                zres = "{}".format(pos)
                zresname = "NA"
                k2 = 0
                count = 0
                while k2 < len(resid_list):
                    posc = int(resid_list[k2])
                    if posc == pos:
                        count = count + 1
                        rcpos = k2
                        k2 = len(resid_list)
                    k2 = k2 + 1

                if count != 0:
                    zresname = "{}".format(resname_list[rcpos])
                    v1 = com_list[rcpos]
                    # COMPUTING THE DISTANCES

                    k3 = 0
                    while k3 < len(com_list):
                        if k3 != rcpos:
                            v2 = com_list[k3]
                            dis = (v1[0] - v2[0])**2 + (v1[1] - v2[1])**2 + (
                                v1[2] - v2[2])**2
                            if dis < (zdis * zdis):
                                zres = zres + ",{}".format(resid_list[k3])
                                zresname = zresname + "{}".format(
                                    resname_list[k3])
                        k3 = k3 + 1

                z.write("{}".format(zres))
                z.write("\n")
                z.write("{}".format(zresname))
                z.write("\n")
                k1 = k1 + 1
        except:
            print("FILE NOT FOUND")
            z.write("NA")
            z.write("\n")
            z.write("NA")
            z.write("\n")

        k = k + 1

    temp.close()
    z.close()
Example #11
0
def neighbours():
    import sys
    import re
    import gzip
    from Bio.PDB.MMCIFParser import MMCIFParser
    parser = MMCIFParser(QUIET=True)
    from Bio.PDB.Polypeptide import three_to_one as tto

    # ZONE VARIABLE
    #zdis = 10.0

    # PATH FOR THE PDB/mmCIF FILES

    pathmmcif = "/Volumes/BIOINFO/mmCIF"
    #pathPDB = "/bmm/data/rcsb/data/structures/all/pdb"
    #pathPDB = "/bmm/home/tkhanna1/Documents/Database/First_10000/test_set"

    # DETERMINING THE ZONE AROUND THE MUTANT SITE TO DETERMINE ANY STRUCTURAL CHANGE TAKING PLACE DUE TO THE MUTATION

    f = open("{}".format(sys.argv[1]), "r")
    ft = f.readlines()
    f.close()

    z = open("{}.txt".format(sys.argv[4]), "w")
    #temp = open("COM.txt","w")

    start = sys.argv[2]
    end = (sys.argv[3])
    if end == "END" or end == "end":
        end = len(ft)
    end = int(end)
    k = int(start)
    while k < end:
        mutant = []
        mu = ft[k].split()

        pdbid = mu[0]
        pdb = pdbid[0:4]  # PDB NAME
        C = pdbid[5:6]  # CHAIN

        pdbidmut = mu[1]
        pdbmut = pdbidmut[0:4]
        Cmut = pdbidmut[5:6]
        reswt = mu[3]
        resmut = mu[4].strip("\n")

        print("*** {} :: {} of {} ***".format(pdb, k, end))

        mutant.append(pdbid)

        # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE

        #ccount = 0
        #if ccount == 0:
        try:
            fol = pdb[1:3]
            pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb)
            tar = gzip.open("{}".format(pdbfile), "rb")
            out = open("pdbprocess{}.cif".format(start), "wb")
            out.write(tar.read())
            tar.close()
            out.close()

            structure_id = "{}".format(pdb)
            filename = "pdbprocess{}.cif".format(start)
            structure = parser.get_structure(structure_id, filename)

            model = structure[0]

            chain = model["{}".format(C)]
            c1 = chain.get_list()  # LIST ALL THE RESIDUES

            k1 = 0
            resid_list = []
            resname_list = []
            com_list = []
            while k1 < len(c1):
                c2 = c1[k1].get_id()
                resid = c2[1]
                if c2[0] == " ":
                    residue = chain[c2]
                    tresname = residue.get_resname()
                    try:
                        resname = tto("{}".format(tresname))
                    except:
                        resname = "X"
                    r1 = residue.get_list(
                    )  # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE

                    k2 = 0
                    res = []  # ATOM NAMES
                    cd = []  # ATOM COORDINATES
                    while k2 < len(r1):
                        r2 = r1[k2].get_id()
                        # COM OF THE BACKBONE
                        if r2 == "CA" or r2 == "N" or r2 == "C" or r2 == "O":
                            res.append(r2)

                            atom = residue['{}'.format(r2)]
                            a1 = atom.get_coord()
                            cd.append(a1)
                        k2 = k2 + 1

                    CM = COM(res, cd)
                    resid_list.append(resid)
                    resname_list.append(resname)
                    com_list.append(CM)

                k1 = k1 + 1
            #temp.write("{}".format(com_list))
            #temp.write("\n")

            # DETERMINING THE ZONE AROUND THE MUTATED RESIDUE

            pos = mu[2].strip("\n")
            pos = int(pos)
            k2 = 0
            count = 0
            while k2 < len(resid_list):
                posc = int(resid_list[k2])
                if posc == pos:
                    count = count + 1
                    rcpos = k2
                    k2 = len(resid_list)
                k2 = k2 + 1

            if count != 0:
                list1 = [pos]
                for zdis in range(4, 11, 1):
                    zres = "{}".format(pos)
                    v1 = com_list[rcpos]
                    # COMPUTING THE DISTANCES
                    k3 = 0
                    while k3 < len(com_list):
                        if k3 != rcpos:
                            v2 = com_list[k3]
                            dis = (v1[0] - v2[0])**2 + (v1[1] - v2[1])**2 + (
                                v1[2] - v2[2])**2
                            if dis < (zdis * zdis):
                                zres = zres + ",{}".format(resid_list[k3])
                        k3 = k3 + 1
                    list1.append(zres)

                z.write("{} {} {} {} {} {} {} {} {} {} {} {} {} {}\n".format(
                    pdb, C, pdbmut, Cmut, list1[0], list1[1], list1[2],
                    list1[3], list1[4], list1[5], list1[6], list1[7], reswt,
                    resmut))
        except:
            print("FILE NOT FOUND")
            #z.write("{}".format(pdbid))
            #z.write("\n")
            #z.write("NA")
            #z.write("\n")
            #z.write("NA")
            #z.write("\n")

        k = k + 1

    #temp.close()
    z.close()