Beispiel #1
0
def copyDataFromTarget(target, model, labels=('psipred', 'netsurfp')):
    data = {}
    for label in target.getDataLabels():
        if not label.startswith(tuple(labels)):
            continue
        data[label] = np.zeros(model.numAtoms(), dtype=target.getData(label).dtype)

    for tchain in target.iterChains():

        #try to find matching chain first
        if tchain.getChid() in [x.getChid() for x in model.iterChains()]:
            mchain = tchain.getChid()
        else:
            mchain = prody.matchChains(model, target[tchain.getChid()],
                                       seqid=50, overlap=20)[0][0]

            mchain = mchain.copy().getChids()[0]

        for tres in tchain.ca.copy().iterResidues():
            for label in data:
                try:
                    indices = model[mchain][tres.getResnum()].getIndices()
                    data[label][indices] = tres.ca.getData(label)
                except AttributeError: #target may have more residues than
                                       #the model
                    pass

    for label in data:
        if data[label].dtype.char == 'S':
            data[label][data[label] == ''] = '-'
        model.setData(label, data[label])
Beispiel #2
0
def fix_openmm():
    # get the whole crystal structure
    # get only the ATOM records
    # and HETAM records for MSE
    # convert MSE to MET
    with open('no_smet.pdb', 'w') as outfile:
        with open('experimental.pdb') as infile:
            for line in infile:
                if line.startswith('ATOM'):
                    outfile.write(line)
                if line.startswith('HETATM'):
                    if line[17:20] == 'MSE':
                        atom_name = line[12:17]
                        if atom_name == 'SE   ':
                            atom_name = ' SD  '
                        line_fixed = 'ATOM  ' + line[
                            6:12] + atom_name + 'MET' + line[20:67] + '\n'
                        outfile.write(line_fixed)

    # load the file into prody
    p = prody.parsePDB('no_smet.pdb')
    p = p.select('not hydrogen')

    # get one of the rosetta models
    r = prody.parsePDB('rosetta.pdb')

    # perform an alignment to find out what part of the crystal structure
    # corresponds to the rosetta file
    match = prody.matchChains(r, p, subset='all', overlap=25,
                              pwalign=True)[0][1]
    print len(match)
    prody.writePDB('chain.pdb', match)

    # now clean it up with pdb fixer
    subprocess.check_call('python ~/Source/PdbFixer/pdbfixer.py chain.pdb',
                          shell=True)

    # now load it with zam
    p = protein.Protein('output.pdb')
    p.Dehydrogen()
    disulfide_pairs = find_disulfide(p)
    for r1, r2 in disulfide_pairs:
        print '    added disulfide between {} and {}'.format(r1, r2)
        p.Res[r1].FullName = 'CYX'
        p.Res[r2].FullName = 'CYX'
    p.WritePdb('start.pdb')

    # now run tleap
    print '    running tleap'
    run_tleap(disulfide_pairs)
Beispiel #3
0
def calc_pocket_rmsd(rec, lig, root):
    """
    Calculate difference between the ligand reference receptor and
    the receptor it is being docked into.

    From original script by David Koes
    """
    ligrec = lig.replace("LIG_aligned.sdf", "PRO.pdb")
    rec = prody.parsePDB(os.path.join(root, rec))
    ligrec = prody.parsePDB(os.path.join(root, ligrec))
    lig = next(pybel.readfile("sdf", os.path.join(root, lig)))
    c = np.array([a.coords for a in lig.atoms])
    nearby = rec.select("protein and same residue as within 3.5 of point",
                        point=c)
    matches = []
    for cutoff in range(90, 0, -10):
        # can't just set a low cutoff since we'll end up with bad alignments
        # try a whole bunch of alignments to maximize the likelihood we get the right one
        m = prody.matchChains(rec,
                              ligrec,
                              subset="all",
                              overlap=cutoff,
                              seqid=cutoff,
                              pwalign=True)
        if m:
            matches += m
    minrmsd = np.inf
    minbackrmsd = np.inf
    for rmap, lrmap, _, _ in matches:
        try:
            closeatoms = set(nearby.getIndices())
            lratoms = []
            ratoms = []
            for i, idx in enumerate(rmap.getIndices()):
                if idx in closeatoms:
                    lratoms.append(lrmap.getIndices()[i])
                    ratoms.append(idx)
            if len(lratoms) == 0:
                continue
            rmsd = prody.calcRMSD(rec[ratoms], ligrec[lratoms])
            backrmsd = prody.calcRMSD(rec[ratoms] & rec.ca,
                                      ligrec[lratoms] & ligrec.ca)
            if rmsd < minrmsd:
                minrmsd = rmsd
                minbackrmsd = backrmsd
        except:
            pass
    return minrmsd, minbackrmsd
Beispiel #4
0
def fix_openmm():
    # get the whole crystal structure
    # get only the ATOM records
    # and HETAM records for MSE
    # convert MSE to MET
    with open('no_smet.pdb', 'w') as outfile:
        with open('experimental.pdb') as infile:
            for line in infile:
                if line.startswith('ATOM'):
                    outfile.write(line)
                if line.startswith('HETATM'):
                    if line[17:20] == 'MSE':
                        atom_name = line[12:17]
                        if atom_name == 'SE   ':
                            atom_name = ' SD  '
                        line_fixed = 'ATOM  ' + line[6:12] + atom_name + 'MET' + line[20:67] + '\n'
                        outfile.write(line_fixed)

    # load the file into prody
    p = prody.parsePDB('no_smet.pdb')
    p = p.select('not hydrogen')

    # get one of the rosetta models
    r = prody.parsePDB('rosetta.pdb')

    # perform an alignment to find out what part of the crystal structure
    # corresponds to the rosetta file
    match = prody.matchChains(r, p, subset='all', overlap=25, pwalign=True)[0][1]
    print len(match)
    prody.writePDB('chain.pdb', match)

    # now clean it up with pdb fixer
    subprocess.check_call('python ~/Source/PdbFixer/pdbfixer.py chain.pdb', shell=True)

    # now load it with zam
    p = protein.Protein('output.pdb')
    p.Dehydrogen()
    disulfide_pairs = find_disulfide(p)
    for r1, r2 in disulfide_pairs:
        print '    added disulfide between {} and {}'.format(r1, r2)
        p.Res[r1].FullName = 'CYX'
        p.Res[r2].FullName = 'CYX'
    p.WritePdb('start.pdb')

    # now run tleap
    print '    running tleap'
    run_tleap(disulfide_pairs)
Beispiel #5
0
def calcS2(model_list, S2_records, S2_type, fit, fit_range):
    """Returns a dictonary with the average S2 values:
    S2_calced[residue] = value"""

    # fitting models
    reference = model_list[0]

    if fit and not PDB_model.is_fitted:
        print("Start FITTING")
        for i in range(1, len(model_list)):
            mobile = model_list[i]
            matches = prody.matchChains(reference, mobile)
            match = matches[0]
            ref_chain = match[0]
            mob_chain = match[1]

            if fit_range:
                weights = np.zeros((len(ref_chain), 1), dtype=np.int)

                fit_start, fit_end = fit_range.split('-')

                for i in range(int(fit_start) - 1, int(fit_end) - 1):
                    weights[i] = 1

            else:
                weights = np.ones((len(ref_chain), 1), dtype=np.int)

            t = prody.calcTransformation(mob_chain, ref_chain, weights)
            t.apply(mobile)

        PDB_model.is_fitted = True

    # get NH vectors from models (model_data[] -> vectors{resnum : vector})
    model_data = []
    s2_pairs = {'N': 'H', 'CA': 'HA'}

    for model in model_list:
        current_Resindex = 1
        has_first, has_second = False, False
        vectors = {}

        for atom in model:
            # why not .getResnum() ???
            atom_res = atom.getResindex() + 1

            if atom_res != current_Resindex:
                current_Resindex = atom_res
                has_first, has_second = False, False

            if atom_res == current_Resindex:
                if atom.getName() == S2_type:
                    has_second = True
                    N_coords = Vec_3D(atom.getCoords())

                elif atom.getName() == s2_pairs[S2_type]:
                    has_first = True
                    H_coords = Vec_3D(atom.getCoords())

                if has_first and has_second:
                    has_first, has_second = False, False
                    vectors[atom_res] = Vec_3D(N_coords - H_coords).normalize()

        model_data.append(vectors)

    S2_calced = {}

    # iterating over STR records
    for resnum in [int(s2rec.resnum) for s2rec in S2_records]:

        x2, y2, z2, xy, xz, yz = 0, 0, 0, 0, 0, 0

        # iterating over PDB models
        for m in model_data:

            # coordinates in model at a given resnum
            x, y, z = m[resnum].v[0], m[resnum].v[1], m[resnum].v[2]

            x2 += x**2
            y2 += y**2
            z2 += z**2
            xy += x * y
            xz += x * z
            yz += y * z

        x2 /= len(model_data)
        y2 /= len(model_data)
        z2 /= len(model_data)
        xy /= len(model_data)
        xz /= len(model_data)
        yz /= len(model_data)

        # S2 calcuation
        s2 = 3 / 2.0 * (x2**2 + y2**2 + z2**2 + 2 * xy**2 + 2 * xz**2 +
                        2 * yz**2) - 0.5

        S2_calced[resnum] = s2

    return S2_calced
Beispiel #6
0
def s2_values(
    model_data, calculate_on_models, s2_records, s2_type, fit, fit_range
):
    """Returns a dictionary with the average S2 values:
    s2_calced[residue] = value"""
    if fit:
        reference = model_data.atomgroup[:]

        model_data.atomgroup.setACSIndex(0)
        prody.alignCoordsets(model_data.atomgroup.calpha)

        if fit_range:
            for model_num in calculate_on_models:
                model_data.atomgroup.setACSIndex(model_num)

                mobile = model_data.atomgroup[:]
                matches = prody.matchChains(reference, mobile)
                match = matches[0]
                ref_chain = match[0]
                mob_chain = match[1]

                weights = np.zeros((len(ref_chain), 1), dtype=np.int)

                fit_start, fit_end = fit_range.split("-")

                for i in range(int(fit_start) - 1, int(fit_end) - 1):
                    weights[i] = 1

                t = prody.calcTransformation(mob_chain, ref_chain, weights)
                t.apply(mobile)

    # get NH vectors from models (model_data[] -> vectors{resnum : vector})
    vector_data = []
    s2_pairs = {"N": "H", "CA": "HA"}
    h_coords = None
    n_coords = None

    for model_num in calculate_on_models:
        model_data.atomgroup.setACSIndex(model_num)
        current_resindex = 1
        has_first, has_second = False, False
        vectors = {}

        for atom in model_data.atomgroup:
            atom_res = atom.getResnum()

            if atom_res != current_resindex:
                current_resindex = atom_res
                has_first, has_second = False, False

            if atom_res == current_resindex:
                if atom.getName() == s2_type:
                    has_second = True
                    n_coords = Vec3D(atom.getCoords())

                elif atom.getName() == s2_pairs[s2_type]:
                    has_first = True
                    h_coords = Vec3D(atom.getCoords())

                if has_first and has_second:
                    has_first, has_second = False, False
                    vectors[atom_res] = Vec3D(
                        n_coords - h_coords
                    ).normalize()

        vector_data.append(vectors)

    s2_calced = {}

    # iterating over STR records
    for resnum in [int(s2rec.resnum) for s2rec in s2_records]:

        x2, y2, z2, xy, xz, yz = 0, 0, 0, 0, 0, 0

        # iterating over PDB models
        for m in vector_data:

            # coordinates in model at a given resnum
            x, y, z = m[resnum].v[0], m[resnum].v[1], m[resnum].v[2]

            x2 += x ** 2
            y2 += y ** 2
            z2 += z ** 2
            xy += x * y
            xz += x * z
            yz += y * z

        x2 /= len(vector_data)
        y2 /= len(vector_data)
        z2 /= len(vector_data)
        xy /= len(vector_data)
        xz /= len(vector_data)
        yz /= len(vector_data)

        s2 = (
            3
            / 2.0
            * (
                x2 ** 2
                + y2 ** 2
                + z2 ** 2
                + 2 * xy ** 2
                + 2 * xz ** 2
                + 2 * yz ** 2
            )
            - 0.5
        )

        s2_calced[resnum] = s2

    return s2_calced
Beispiel #7
0
    def align():

        global wd
        ans = wd + '/challengedata/answers'
        if os.path.isdir(
                ans) == False:  #if the answers directory isnt formed make it
            os.mkdir(wd + '/challengedata/answers')
        rddir = wd + '/challengedata/rdkit-scripts'
        if os.path.isdir(rddir) == False:
            a = 'git clone https://github.com/dkoes/rdkit-scripts'
            os.system(a)
        data = os.listdir(wd + '/challengedata')
        for x in (data):  #for each weeks data
            if x == "readme.txt" or x == "latest.txt" or x == "answers" or x == "rdkit-scripts" or x == 'PDBfiles' or x == 'visual.txt':
                pass
            else:
                toDir = wd + '/challengedata/answers/' + x
                if os.path.isdir(
                        toDir
                ) == False:  #if the path to answers dir doesnt exist
                    os.mkdir(toDir)  #make directory
                dock = os.listdir(wd + '/challengedata/' + x)
                for y in (dock):
                    a = str(os.getcwd() + '/answers/' + x + '/' + y +
                            '/lmcss_docked.sdf')
                    if y == 'readme.txt' or y == 'new_release_structure_sequence_canonical.tsv' or y == 'new_release_structure_nonpolymer.tsv' or y == 'new_release_crystallization_pH.tsv' or y == 'new_release_structure_sequence.tsv':
                        pass
                    elif (os.path.isfile(a) == True):
                        pass
                    else:
                        input = os.listdir(wd + '/challengedata/' + x + '/' +
                                           y)
                        for z in (input):
                            if z.startswith("LMCSS") and z.endswith(".pdb"):
                                if (z.endswith("lig.pdb")):
                                    pass
                                else:
                                    id = z.strip('.pdb')

                                    sts = str("grep ATOM " + z +
                                              " > lmcss_rec.pdb"
                                              )  #creates receptor .pdb file
                                    cd = wd + '/challengedata'
                                    os.chdir(
                                        cd + '/' + x + '/' +
                                        y)  #change directory to week/ligand
                                    os.system(
                                        sts
                                    )  #runs and creates receptor .pbd file
                                    os.chdir(cd)  #back to challenge directory
                                    input = os.listdir(
                                        cd + '/' + x + '/' + y
                                    )  #lists files inside ligand in certain week
                                    for z in (input):
                                        if z.endswith(
                                                ".smi"
                                        ):  # changes .smi -> lig.sdf
                                            cd = str(os.getcwd())
                                            sts = str(" " + cd + '/' + x +
                                                      '/' + y + '/' + z +
                                                      " lig.sdf --maxconfs 1")
                                            os.chdir(cd + '/' + x + '/' + y)
                                            os.system(
                                                cd +
                                                '/rdkit-scripts/rdconf.py' +
                                                sts)
                                            os.chdir(cd)

                                    for z in (input):  # runs smina
                                        if z.endswith("lig.pdb"):
                                            sts = str(
                                                "smina -r lmcss_rec.pdb -l lig.sdf --autobox_ligand "
                                                + z + " -o " + id +
                                                "_docked.sdf")
                                            cd = str(
                                                os.getcwd())  #lignad directory
                                            os.chdir(cd + '/' + x + '/' + y)
                                            #os.system(sts)

                                            sts = str(
                                                "smina -r lmcss_rec.pdb -l lig.sdf --autobox_ligand "
                                                + z + " -o lmcss_docked.sdf")
                                            cd = str(
                                                os.getcwd())  #lignad directory
                                            os.chdir(cd + '/' + x + '/' + y)
                                            os.system(sts)
                                            os.chdir(cd)

                                    cur = str(os.getcwd() + '/answers/' + x +
                                              '/' + y)
                                    if (os.path.isdir(cur) == True):
                                        os.chdir(cd + '/' + x + '/' + y)
                                        os.getcwd()  ##
                                        input = os.listdir(cd + '/' + x + '/' +
                                                           y)

                                        for i in (input):
                                            if i.endswith(
                                                    ".txt"
                                            ) and i != "center.txt" and i != "visual.txt":
                                                f = open(i)
                                                lines = f.readlines()
                                                ligand = lines[2].strip(
                                                    'ligand, ')
                                                ligand = ligand.replace(
                                                    '\n', '')
                                                ligand = str(ligand)
                                                #gets the ligand from txt file
                                            if i.endswith("lig.pdb"):
                                                #see if pdb exists
                                                prody.fetchPDB(y)
                                                proteinPDB = prody.parsePDB(y)
                                                ourPDB = prody.parsePDB(
                                                    'lmcss_rec.pdb')
                                                a, b, seqid, overlap = prody.matchChains(
                                                    proteinPDB, ourPDB)[0]
                                                b, protein_sp = prody.superpose(
                                                    b, a, weights=None)
                                                b.select(ligand +
                                                         '_ligand.pdb')
                                                sts = str("obrms -f " + i +
                                                          ' ' + id +
                                                          "_docked.sdf")
                                                #run obrms
                                                # parse results and output to the visualization txt file
                                                os.system(sts)
                                                f = open('visual.txt', 'ab+')
                                                f.write(x + '	smina	' + y +
                                                        '\n')
                                                f.close
                                                curdir = str(cd + '/' + x +
                                                             '/' + y + '/' +
                                                             id +
                                                             '_docked.sdf')
                                        print(input)  ##
                                        for i in (input):
                                            if i.endswith("lig.pdb"):
                                                #see if pdb exists
                                                protein = prody.fetchPDB(y)
                                                #NEED NUMPY ARRAY
                                                prody.writeArray(
                                                    'lmcss_docked_array.sdf',
                                                    array)
                                                prody.superpose(
                                                    'lmcss_docked.sdf',
                                                    protein,
                                                    weights=None)
                                                sts = str("obrms -f " + i +
                                                          " lmcss_docked.sdf")
                                                #run obrms
                                                # parse results and output to the visualization txt file
                                                os.system(sts)
                                                os.chdir(wd +
                                                         '/challengedata/')
                                                f = open('visual.txt', 'ab+')
                                                f.write(x + '	smina	' + y +
                                                        '\n')
                                                f.close
                                                curdir = str(
                                                    cd + '/' + x + '/' + y +
                                                    '/lmcss_docked.sdf')
                                                todir = str(cd + '/answers/' +
                                                            x + '/' + y + '/')
                                                shutil.copy(curdir, todir)
                                                print(curdir)
                                                break
                                        os.chdir(wd)
                                    else:
                                        os.mkdir(cur)
                                        os.chdir(cd + '/' + x + '/' + y)
                                        input = os.listdir(cd + '/' + x + '/' +
                                                           y)
                                        for i in (input):
                                            if i.endswith(
                                                    ".txt"
                                            ) and i != "center.txt" and i != "visual.txt":
                                                f = open(i)
                                                lines = f.readlines()
                                                ligand = lines[2].strip(
                                                    "ligand, ")
                                                ligand = ligand.replace(
                                                    '\n', '')
                                                ligand = str(ligand)
                                                #gets ligand from txt file

                                            if i.endswith("lig.pdb"):
                                                prody.fetchPDB(y)
                                                proteinPDB = prody.parsePDB(y)
                                                ourPDB = prody.parsePDB(
                                                    'lmcss_rec.pdb')
                                                prody.matchChains(
                                                    proteinPDB, ourPDB)
                                                protein_sp = prody.superpose(
                                                    ourPDB,
                                                    proteinPDB,
                                                    weights=None)
                                                protein_sp.select(
                                                    ligand + '_ligand.pdb')
                                                sts = str("obrms -f " + i +
                                                          ' ' + id +
                                                          "_docked.sdf")
                                                os.system(sts)
                                                f = open('visual.txt', 'ab+')
                                                f.write(x + '	smina	' + y +
                                                        '\n')
                                                f.close
                                                curdir = str(cd + '/' + x +
                                                             '/' + y + '/' +
                                                             id +
                                                             '_docked.sdf')
                                            if i.endswith("lig.pdb"):
                                                protein = prody.fetchPDB(y)
                                                prody.writeArray(
                                                    'lmcss_docked_array.sdf',
                                                    array)
                                                prody.superpose(
                                                    'lmcss_docked.sdf',
                                                    protein,
                                                    weights=None)

                                                sts = str("obrms -f " + i +
                                                          " lmcss_docked.sdf")
                                                os.system(sts)
                                                os.chdir(wd +
                                                         '/challengedata/')
                                                f = open('visual.txt', 'ab+')
                                                f.write(x + '	smina	' + y +
                                                        '\n')
                                                f.close
                                                curdir = str(
                                                    cd + '/' + x + '/' + y +
                                                    '/lmcss_docked.sdf')
                                                todir = str(cd + '/answers/' +
                                                            x + '/' + y + '/')
                                                shutil.copy(curdir, todir)
                                                print(curdir)
                                                break
                                os.chdir(wd)