コード例 #1
0
ファイル: test2.py プロジェクト: tommycarstensen/sandbox
import sys
sys.path.append('/home/tc/svn/tc_sandbox/pdb')
import parse_mmCIF, mmCIF2coords
sys.path.append('/home/tc/svn/GoodVibes')
import NMA, visualization

d_mmCIF = parse_mmCIF.main('2lzm',)
d_coords, l_coords_alpha = mmCIF2coords.main('2lzm',d_mmCIF)

cutoff = 10
matrix_hessian = NMA.hessian_calculation(l_coords_alpha, cutoff)
eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian,)
visualization.vmd_trajectory('2lzm',l_coords_alpha,eigenvectors)
コード例 #2
0
def parse_GoodVibes_exclude_flexible(
    pdb,
    path,
):

    ##
    ## calculate amplitudes
    ##
    d_mmCIF = parse_mmCIF.main(pdb[:4], )
    d_coords, l_coords_alpha = mmCIF2coords.main(pdb[:4],
                                                 d_mmCIF,
                                                 query_chain=pdb[-1])
    print len(l_coords_alpha)
    ##
    ## eigenvector
    ##
    cutoff = 10
    matrix_hessian = NMA.hessian_calculation(
        l_coords_alpha,
        cutoff,
    )
    eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian)
    l_amplitudes = [
        math.sqrt(eigenvectors[6][i]**2 + eigenvectors[6][i + 1]**2 +
                  eigenvectors[6][i + 2]**2)
        for i in range(0, len(eigenvectors[6]), 3)
    ]

    ##    ## write pdb (color by bfactor)
    ##    l_bfactors = [100*(l_amplitudes[i]-min(l_amplitudes))/(max(l_amplitudes)-min(l_amplitudes)) for i in range(len(l_amplitudes))]
    ##    fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r')
    ##    lines = fd.readlines()
    ##    fd.close()
    ##    index = [-1,None,]
    ##    lines_out = []
    ##    for line in lines:
    ##        record = line[:6].strip()
    ##        if record != 'ATOM':
    ##            lines_out += [line]
    ##        else:
    ##            res_no = int(line[22:26])
    ##            if res_no != index[1]:
    ##                index = [index[0]+1,res_no,]
    ##                bfactor = l_bfactors[index[0]]
    ##            line_out = '%s%6.2f%s' %(line[:60],bfactor,line[66:],)
    ##            lines_out += [line_out]
    ##    fd = open('output/%s/%s_%s_probe_color_by_amplitude.pdb' %(path,pdb[:4],pdb[-1],),'w')
    ##    fd.writelines(lines_out)
    ##    fd.close()

    ## average amplitude
    average = sum(l_amplitudes) / len(l_amplitudes)
    average, stddev = statistics.do_stddev(l_amplitudes)
    ##
    l_coords_rigid = []
    for i in range(len(l_coords_alpha)):
        if l_amplitudes[i] < average:
            l_coords_rigid += [l_coords_alpha[i]]
    l_coords_flexible = []
    for i in range(len(l_coords_alpha)):
        if l_amplitudes[i] > average + 0.5 * stddev:
            l_coords_flexible += [l_coords_alpha[i]]

    ## parse output
    fd = open('output/%s/%s_%s_probe.pdb' % (
        path,
        pdb[:4],
        pdb[-1],
    ), 'r')
    lines = fd.readlines()
    fd.close()

    max_bfactor = None
    coord = None
    for line in lines:
        record = line[:6].strip()
        if record not in [
                'ATOM',
                'HETATM',
        ]:
            continue
        res_name = line[17:20]
        if res_name != 'EXT':
            continue

        bfactor = float(line[60:66])

        if bfactor > max_bfactor:
            x = float(line[30:38])
            y = float(line[38:46])
            z = float(line[46:54])

            ##            coord_tmp = numpy.array([x,y,z,])

            ##            bool_vicinal_to_rigid = False
            ##            for coord_rigid in l_coords_rigid:
            ##                dist_from_rigid = math.sqrt(sum((coord_rigid-coord_tmp)**2))
            ##                if dist_from_rigid < 6:
            ##                    bool_vicinal_to_rigid = True
            ##                    break
            ##            if bool_vicinal_to_rigid == False:
            ##                continue

            ##            bool_vicinal_to_flexible = False
            ##            for coord_flexible in l_coords_flexible:
            ##                dist_from_flexible = math.sqrt(sum((coord_flexible-coord_tmp)**2))
            ##                if dist_from_flexible < 6:
            ##                    bool_vicinal_to_flexible = True
            ##                    break
            ##            if bool_vicinal_to_flexible == True:
            ##                continue

            ##            min_dist = [1000.,None,]
            ##            for i_coord_alpha in range(len(l_coords_alpha)):
            ##                coord_alpha = l_coords_alpha[i_coord_alpha]
            ##                dist_from_alpha = math.sqrt(sum((coord_alpha-coord_tmp)**2))
            ##                if dist_from_alpha < min_dist[0]:
            ##                    min_dist = [dist_from_alpha,i_coord_alpha,]
            ##            if l_amplitudes[min_dist[1]] > average+stddev:
            ##                continue

            coord = numpy.array([
                x,
                y,
                z,
            ])
            max_bfactor = bfactor

    return coord
コード例 #3
0
def main(
    pdb,
    chain,
    dist_max,
    dist_min,
    mode='single',
    v_apoholo=None,
    l_coords_probe=None,
    l_coords_protein_alpha=None,
):

    ##
    ## settings
    ##
    dist_min_sq = dist_min**2
    dist_max_sq = dist_max**2

    ## parse coordinates
    d_coords = parse_pdb_coordinates(
        pdb,
        chain,
    )

    if l_coords_protein_alpha == None:
        ## parse alpha carbon atoms
        l_coords_protein_alpha = parse_alpha_carbon_atoms(d_coords, )

    ## calulate hessian matrix
    matrix_hessian_protein = do_interactions(l_coords_protein_alpha, )
    ## diagonalize hessian matrix
    eigenvectors_protein, eigenvalues_protein = NMA.diagonalize_hessian(
        matrix_hessian_protein, )

    if v_apoholo != None:
        mode_max_apoholo, overlap_max_apoholo, l_factors = find_max_mode_apo_holo(
            pdb,
            eigenvectors_protein,
            v_apoholo,
            eigenvalues_protein,
        )
##    ## tmp!!!
##    mode_max_apoholo = 6
##    v1 = v_apoholo
##    v2 = eigenvectors_protein[mode_max_apoholo]
##    overlap_max_apoholo = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2))

##    if 2+2 == 4: ## tmp!!!
##        print 'tmp!!!'
##        return l_factors

## determine dimensions of protein
    d_dimensions = determine_protein_dimensions(l_coords_protein_alpha, )
    ## add probe atoms
    fn = '/home/tc/UCD/GV_ligand_binding_site_identification/output/GoodVibes/distmax6_distmin3/%s_%s_probe.pdb' % (
        pdb,
        chain,
    )
    if l_coords_probe:
        print 'a'
        pass
    elif os.path.isfile(fn):
        print 'b'
        l_coords_probe = []
        fd = open(fn)
        lines = fd.readlines()
        fd.close()
        for line in lines:
            record = line[:6].strip()
            if record == 'HETATM' and line[17:20] == 'EXT':
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                coord = numpy.array([
                    x,
                    y,
                    z,
                ])
                l_coords_probe += [coord]
    else:
        l_coords_probe = add_probe_atoms(
            d_coords,
            d_dimensions,
            dist_min_sq,
            dist_max_sq,
        )

    ## calculate overlaps
    print 'looping over', len(l_coords_probe), 'probe coordinates'
    l_overlaps = []

    for i in range(len(l_coords_probe)):

        print i, len(l_coords_probe)

        coord_holo = l_coords_probe[i]

        l_coords = l_coords_protein_alpha + [coord_holo]

        ##        matrix_hessian_holo = do_interactions(l_coords,bool_extra=True,) ## tmp!!!
        ##        matrix_hessian_holo = do_interactions(l_coords,bool_strong=True) ## tmp!!!
        matrix_hessian_holo = do_interactions(l_coords)

        try:
            eigenvectors_holo, eigenvalues_holo = NMA.diagonalize_hessian(
                matrix_hessian_holo)
        except:
            print 'exception'
            l_overlaps += [1.]
            continue

        ## compare to x-ray motion
        if v_apoholo != None:
            v1 = v_apoholo
            v2 = eigenvectors_holo[mode_max_apoholo][:-3]
            overlap = abs(numpy.dot(v1, v2)) / math.sqrt(
                numpy.dot(v1, v1) * numpy.dot(v2, v2))
            print 'overlap', overlap

            max_overlap = overlap
            max_mode = mode_max_apoholo

            ## check neigboring modes for max overlap...
            if 2 + 2 == 5:
                max_mode = mode_max_apoholo
                switch_max = 3
                for mode in range(max(6, mode_max_apoholo - switch_max),
                                  mode_max_apoholo + switch_max + 1):
                    if mode == mode_max_apoholo:
                        continue
                    v2 = eigenvectors_holo[mode][:len(v_apoholo)]
                    overlap = abs(numpy.dot(v1, v2)) / math.sqrt(
                        numpy.dot(v1, v1) * numpy.dot(v2, v2))
                    if overlap > max_overlap:
                        print '********', mode, round(
                            overlap, 3), mode_max_apoholo, round(
                                max_overlap, 3), mode_max_apoholo, round(
                                    overlap_max_apoholo, 3), pdb
                        max_overlap = overlap
                        max_mode = mode
                        if mode_max_apoholo < 12 and overlap > 1.2 * overlap_max_apoholo:
                            print '******** induced fit?'
            l_overlaps += [max_overlap]

            ## perturb elastic netwrok and recalculate mode contribution
            if 2 + 2 == 5:
                eigenvectors_holo = numpy.transpose(eigenvectors_holo)
                vector = numpy.array([
                    0.,
                    0.,
                    0.,
                ])
                v_apoholo = numpy.array(list(v_apoholo) + [
                    0.,
                    0.,
                    0.,
                ])
                l_factors_holo = numpy.linalg.solve(
                    eigenvectors_holo,
                    v_apoholo,
                )
                l_factors_holo_abs = [abs(factor) for factor in l_factors_holo]

                if mode_max_apoholo != list(l_factors_holo_abs).index(
                        max(l_factors_holo_abs)):
                    print mode_max_apoholo, list(l_factors_holo_abs).index(
                        max(l_factors_holo_abs))
                    print mode_max_apoholo, overlap_max_apoholo, overlap
                    print l_factors_holo_abs[mode_max_apoholo], max(
                        l_factors_holo)
                    s = '# mode factor absfactor eigenvalue\n'
                    for i in range(len(l_factors_holo)):
                        s += '%s %s %s\n' % (
                            i + 1,
                            l_factors_holo[i],
                            abs(l_factors_holo[i]),
                        )
                    fd = open('facs_eigvals_%s_perturbed.txt' % (pdb), 'w')
                    fd.write(s)
                    fd.close()
                    write_pdb(
                        l_overlaps,
                        l_coords_probe,
                        pdb,
                        chain,
                    )
    ##                stop_mode

##            ## tmp!!!
##            v2 = eigenvectors_holo[6][:-3]
##            overlap6 = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2))
##            if overlap6 > 1.1*overlap:
##                print mode_max_apoholo, overlap
##                print 6, overlap6
##                v2 = eigenvectors_holo[7][:-3]
##                overlap7 = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2))
##                print 7, overlap7
##                v2 = eigenvectors_holo[8][:-3]
##                overlap8 = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2))
##                print 8, overlap8
##                stop

        elif mode == 'single':
            eigenvectors_holo = eigenvectors_holo[:-3]
            l = []
            ## check first 3 modes in case eigenvalues have swapped
            for mode_holo in range(
                    6,
                    10,
            ):
                overlap = calc_overlap(
                    eigenvectors_protein,
                    eigenvectors_holo,
                    ##                    eigenvalues_protein, eigenvalues_holo,
                    mode_holo=mode_holo,
                )
                l += [overlap]
                if overlap > 0.9:
                    break
            overlap_max = max(l)

            print pdb, i, len(l_coords_probe), overlap_max

            ##            ## go for mode 7
            ##            if overlap_max < 0.9:
            ##                overlap_max = l[0]
            l_overlaps += [overlap_max]
##            if overlap_max < 0.90:
##                print pdb, i+1, len(l_coords_probe), overlap
##                print calc_overlap(
##                    eigenvectors_protein,eigenvectors_holo,
##                    eigenvalues_protein, eigenvalues_holo,
##                    mode_holo = 6,
##                    )
##                stop
        elif mode == 'multiple':
            eigenvectors_holo = eigenvectors_holo[:-3]
            overlap = calc_overlap(
                eigenvectors_protein,
                eigenvectors_holo,
                eigenvalues_protein,
                eigenvalues_holo,
                l_factors=l_factors,
            )
            l_overlaps += [overlap]
            print overlap, i, len(l_coords_probe)
        else:
            print sys.argv
            stop


##    fd = open('l_overlaps.txt','r')
##    s = fd.read()
##    fd.close()
##    l_overlaps = s.split()
##    l_overlaps = l_overlaps[1::2]

##
## combine protein and probe coordinates and add bfactors
##

##    d_coords_holo = parse_pdb_coordinates(pdb_holo,chain_holo,)
##    if len(d_coords.keys()) != len(d_coords_holo.keys()):
##        print len(d_coords.keys())
##        print len(d_coords_holo.keys())
##        stop
##    l_coords_protein_alpha_holo = parse_alpha_carbon_atoms(d_coords_holo,)
##    instance_geometry = geometry.geometry()
##    rmsd = instance_geometry.superpose(l_coords_protein_alpha,l_coords_protein_alpha_holo,)
##    tv1 = instance_geometry.fitcenter
##    rm = instance_geometry.rotation
##    tv2 = instance_geometry.refcenter
##    parse_ligand_coordinates(pdb_holo,chain_holo,ligand_ID,)

    if v_apoholo != None and len(l_overlaps) > 1:
        print l_overlaps
        l_overlaps = fix_overlaps(l_overlaps)
        print max(l_overlaps), min(l_overlaps)

    if (v_apoholo == None or (v_apoholo != None and len(l_overlaps) > 1)):
        write_pdb(
            l_overlaps,
            l_coords_probe,
            pdb,
            chain,
        )

    if v_apoholo != None:
        d = {
            'mode_max_apoholo': mode_max_apoholo,
            'overlap_max_apoholo': overlap_max_apoholo,
            'l_overlaps': l_overlaps,
            'l_factors': l_factors,
            'eigenvectors': eigenvectors_protein,
        }
        if 2 + 2 == 5:
            d['l_factors_probe'] = l_factors_holo
            d['max_mode'] = max_mode

        return d
    else:
        print 'how much to return to function that called me? just l_overlaps?'
        return l_overlaps
コード例 #4
0
import sys
sys.path.append('/home/tc/svn/tc_sandbox/pdb')
import parse_mmCIF, mmCIF2coords
sys.path.append('/home/tc/svn/GoodVibes')
import NMA, visualization

d_mmCIF = parse_mmCIF.main('2lzm', )
d_coords, l_coords_alpha = mmCIF2coords.main('2lzm', d_mmCIF)

cutoff = 10
matrix_hessian = NMA.hessian_calculation(l_coords_alpha, cutoff)
eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian, )
visualization.vmd_trajectory('2lzm', l_coords_alpha, eigenvectors)
コード例 #5
0
    '1czfA', '1thgA', '1booA', '1iu4A', '1bqcA', '206lA', '1cdeA', '1snzA',
    '1gq8A', '1aqlA', '1ps1A', '1s95A', '1pylA', '1ra2A', '1b6bA', '1pntA',
    '1e1aA', '2f9rA', '1v04A', '2nlrA', '1n29A', '1pbgA', '5cpaA', '1agmA',
    '1byaA', '1r76A', '1u5uA', '1vidA', '1h4gA', '1akdA', '1fy2A', '1xqdA',
    '1d6oA', '1qv0A', '1qjeA', '1fvaA', '1bp2A', '1ah7A', '2pthA', '2engA',
    '2acyA', '1qazA', '2a0nA', '1dl2A', '1gp5A', '1onrA', '1cwyA', '1pudA',
    '1bs9A', '1dinA', '1xyzA', '1bwlA', '1eugA', '1idjA', '1g24A', '1oygA',
    '1hzfA', '9papA', '1eb6A', '1ghsA', '1rbnA', '1bixA', '1bs4A', '1celA',
    '1hkaA', '1b02A', '1qibA', '1u3fA', '1agyA', '1zioA', '1pa9A', '2tpsA',
    '2plcA', '1qk2A', '1j53A', '1m21A',
    ]

cutoff = 10

for pdb in l_pdbs:

    pdb = pdb[:4]

    d = parse_mmCIF.main(pdb,)

    d_coords, l_coords = mmCIF2coords.main(pdb, d, query_chain = pdb[4:])

    matrix_hessian = NMA.hessian_calculation(l_coords, cutoff, verbose = False)

    eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian, verbose = False)

    visualization.vmd_arrows(pdb, l_coords, eigenvectors)

    print pdb
    stop
コード例 #6
0
def main():

    set_pdbs = exclude_include()
    l_pdbs_remove = [
        '4a3h','2wf5','1arl','1ee3', ## incorrect _struct_ref_seq.pdbx_db_accession
        '1uyd','1uye','1uyf','2byh','2byi', ## remediation _struct_ref_seq_dif
        '2xdu','3dn8','3dna','1ps3','1ouf','1l35','2eun','1rtc','1zon', ## _struct_ref_seq_dif missing
        '1pwl','1pwm','2fz8','2fz9', ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
        ]
    set_pdbs.remove('1f92') ## remediation _struct_ref_seq_dif incorrect residue number
    set_pdbs.remove('2f6f') ## remediation _pdbx_poly_seq_scheme.auth_mon_id wrong
    set_pdbs.remove('3a5j') ## remediation _struct_ref_seq_dif.db_mon_id is ? but should be MET
    set_pdbs.remove('2rhx') ## remediation _struct_ref_seq_dif.db_mon_id is ? but should be SER
    set_pdbs.remove('2fzb') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('2fzd') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('3dn5') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('1x96') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('1x97') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('1x98') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('1z3n') ## GenBank DBref - not an error...
    set_pdbs.remove('1z8a') ## GenBank DBref - not an error...
    set_pdbs.remove('1z89') ## GenBank DBref - not an error...
    set_pdbs.remove('2pf8') ## stupid use of alt_ids (C for highest occupancy and only altloc)
    set_pdbs.remove('2pyr') ## stupid use of alt_ids (G and R)
    set_pdbs.remove('3pdn') ## stupid use of alt_ids (B and C)
    set_pdbs.remove('2v4c') ## alt_id B used for 100% occupancy atoms
    set_pdbs.remove('1jxt') ## weird alt_id microheterogeneity...
    set_pdbs.remove('1jxu') ## weird alt_id microheterogeneity...
    set_pdbs.remove('1jxw') ## weird alt_id microheterogeneity...
    set_pdbs.remove('1jxx') ## weird alt_id microheterogeneity...
    set_pdbs.remove('1jxy') ## weird alt_id microheterogeneity...
##    set_pdbs.remove('1ac4') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)...
##    set_pdbs.remove('1ac8') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)...
##    set_pdbs.remove('1aeb') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)...
##    set_pdbs.remove('2rbt') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789
##    set_pdbs.remove('2rbu') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789
##    set_pdbs.remove('2rbv') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789
    for pdb in l_pdbs_remove:
        set_pdbs.remove(pdb)

    fd = open('%s/bc-100.out' %(path_mmCIF),'r')
    lines = fd.readlines()
    fd.close()

    for i_line in range(len(lines)):
        cluster = i_line
        if cluster < 4816:
            continue
##        if cluster not in [5,]:
##            continue
        line = lines[i_line]
        l_pdbs = line.lower().split()
        l_pdbs.sort()
        for i_pdb in range(len(l_pdbs)):
            l_pdbs[i_pdb] = l_pdbs[i_pdb][:4]

        for i_pdb1 in range(0,len(l_pdbs)-1):

            pdb1 = l_pdbs[i_pdb1]

##            if pdb1 != '1t49': ## tmp!!!
##                continue

            if not pdb1 in set_pdbs:
                continue

            print pdb1
            stop

            d_mmCIF1 = parse_mmCIF.main(pdb1,)

            bool_monomeric = check_monomeric(d_mmCIF1)
            if bool_monomeric == False:
                if i_pdb1 == 0:
                    break
                else:
                    continue

            bool_remediation_modres = check_modres(d_mmCIF1,pdb1,)
            if bool_remediation_modres == True:
                continue

            if '_struct_ref_seq_dif.details' in d_mmCIF1.keys():
                if 'DELETION' in d_mmCIF1['_struct_ref_seq_dif.details']:
                    continue

            for i_entity in range(len(d_mmCIF1['_entity.id'])):
                if d_mmCIF1['_entity.type'][i_entity] == 'polymer':
                    if int(d_mmCIF1['_entity.pdbx_number_of_molecules'][i_entity]) != 1:
                        print d_mmCIF1['_entity.pdbx_number_of_molecules']
                        print pdb1, cluster
                        stop

            SG1 = d_mmCIF1['_symmetry.space_group_name_H-M']

            for i_pdb2 in range(i_pdb1+1,len(l_pdbs)):

                pdb2 = l_pdbs[i_pdb2]

##                if pdb2 != '2pf8': ## tmp!!!
##                    continue

##                if pdb1 != '3fui' or pdb2 != '3fuj':
##                    continue

                if not pdb2 in set_pdbs:
                    continue

                d_mmCIF2 = parse_mmCIF.main(pdb2,)

                bool_monomeric = check_monomeric(d_mmCIF2)
                if bool_monomeric == False:
                    continue

                bool_remediation_modres = check_modres(d_mmCIF2,pdb2,)
                if bool_remediation_modres == True:
                    continue

                if '_struct_ref_seq_dif.seq_num' in d_mmCIF2.keys():
                    if 'DELETION' in d_mmCIF2['_struct_ref_seq_dif.details']:
                        continue

                ## biounit monomeric?
                for i_entity in range(len(d_mmCIF2['_entity.id'])):
                    if d_mmCIF2['_entity.type'][i_entity] == 'polymer':
                        if int(d_mmCIF2['_entity.pdbx_number_of_molecules'][i_entity]) != 1:
                            continue

                SG2 = d_mmCIF2['_symmetry.space_group_name_H-M']

                if SG1 != SG2:
                    continue

                ## parse coordinates again after being shortened in previous loop
                try:
                    d_coords1, l_coords_alpha1 = mmCIF2coords.main(pdb1, d_mmCIF1)
                except:
                    fd = open('remediation_atom_site.label_alt_id.txt','a')
                    fd.write('%s\n' %(pdb1,))
                    fd.close()
                try:
                    d_coords2, l_coords_alpha2 = mmCIF2coords.main(pdb2, d_mmCIF2)
                except:
                    fd = open('remediation_atom_site.label_alt_id.txt','a')
                    fd.write('%s\n' %(pdb2,))
                    fd.close()

                ## align sequences/coordinates
                try:
                    l_coords_alpha1, l_coords_alpha2 = create_apo_holo_dataset.sequential_alignment_of_coordinates(
                        l_coords_alpha1, l_coords_alpha2,
                        d_mmCIF1, d_mmCIF2,
                        pdb1, pdb2,
                        )
                except:
                    fd = open('remediation_struct_ref_seq_dif.txt','a')
                    fd.write(
                        '%s %s %s %s\n' %(
                            pdb1,pdb2,
                            d_mmCIF1['_struct_ref_seq.pdbx_db_accession'],
                            d_mmCIF2['_struct_ref_seq.pdbx_db_accession'],
                            )
                        )
                    fd.close()
                    continue
                if len(l_coords_alpha1) != len(l_coords_alpha2):
                    print d_mmCIF1['_pdbx_poly_seq_scheme.pdb_mon_id']
                    print d_mmCIF2['_pdbx_poly_seq_scheme.pdb_mon_id']
                    print 'coords', len(l_coords_alpha1), len(l_coords_alpha2)
                    print 'seq', len(d_mmCIF1['_pdbx_poly_seq_scheme.pdb_mon_id'])
                    print 'seq', len(d_mmCIF2['_pdbx_poly_seq_scheme.pdb_mon_id'])
                    print pdb1, pdb2
                    d_coords1, l_coords_alpha1 = mmCIF2coords.main(pdb1, d_mmCIF1)
                    d_coords1, l_coords_alpha2 = mmCIF2coords.main(pdb1, d_mmCIF2)
                    print len(l_coords_alpha1), len(l_coords_alpha2)
                    stop
                    continue

                ##
                ## align structure 1 and 2
                ##
                instance_geometry = geometry.geometry()
                rmsd = instance_geometry.superpose(l_coords_alpha1,l_coords_alpha2)
                tv1 = instance_geometry.fitcenter
                rm = instance_geometry.rotation
                tv2 = instance_geometry.refcenter

                ## structural alignment
                for i_coord in range(len(l_coords_alpha2)):
                    l_coords_alpha2[i_coord] = numpy.dot(l_coords_alpha2[i_coord]-tv1,rm)+tv2

                ##
                ## vector from structure 1 to 2
                ##
                vector = []
                for i in range(len(l_coords_alpha1)):
                    vector += [
                        l_coords_alpha1[i][0]-l_coords_alpha2[i][0],
                        l_coords_alpha1[i][1]-l_coords_alpha2[i][1],
                        l_coords_alpha1[i][2]-l_coords_alpha2[i][2],
                        ]
                vector = numpy.array(vector)

                ##
                ## calculate normal modes of structure 1
                ##
                cutoff = 10
                try:
                    matrix_hessian1 = NMA.hessian_calculation(l_coords_alpha1, cutoff, verbose = False)
                    eigenvectors1, eigenvalues1 = NMA.diagonalize_hessian(matrix_hessian1, verbose = False)
                    matrix_hessian2 = NMA.hessian_calculation(l_coords_alpha2, cutoff, verbose = False)
                    eigenvectors2, eigenvalues2 = NMA.diagonalize_hessian(matrix_hessian2, verbose = False)
                except:
                    continue

                ##
                ## calculate overlap between normal modes and difference vector
                ##
                eigenvector1 = eigenvectors1[6]
                eigenvector2 = eigenvectors2[6]

                overlap1 = calc_overlap(eigenvector1,vector)
                overlap2 = calc_overlap(eigenvector2,vector)
                overlap3a = calc_overlap(eigenvector1,eigenvector2)
                overlap3b = calc_overlap(eigenvectors1[6],eigenvectors2[7])
                overlap3c = calc_overlap(eigenvectors1[7],eigenvectors2[6])
                overlap3 = max(overlap3a,overlap3b,overlap3c)

                fd = open('rmsd_v_overlap2/cluster%i.txt' %(i_line),'a')
                fd.write('%s %s\n' %(rmsd,overlap1))
                fd.close()
                fd = open('rmsd_v_overlap2/cluster%i.txt' %(i_line),'a')
                fd.write('%s %s\n' %(rmsd,overlap2))
                fd.close()
                fd = open('rmsd_v_overlap2/cluster%i_ev_v_ev.txt' %(i_line),'a')
                fd.write('%s %s\n' %(rmsd,overlap3a))
                fd.close()
                fd = open('rmsd_v_overlap2/cluster%i_ev_v_ev_max.txt' %(i_line),'a')
                fd.write('%s %s\n' %(rmsd,overlap3))
                fd.close()
                print pdb1, pdb2, 'cluster', i_line, 'size', len(l_pdbs),
                print 'overlap', '%4.2f' %(round(overlap1,2)), '%4.2f' %(round(overlap2,2)), '%4.2f' %(round(overlap3,2)), 'rmsd', '%4.2f' %(round(rmsd,2))

    return
コード例 #7
0
def main(
    pdb,chain,dist_max,dist_min,mode='single',v_apoholo=None,l_coords_probe=None,
    l_coords_protein_alpha=None,
    ):

    ##
    ## settings
    ##
    dist_min_sq = dist_min**2
    dist_max_sq = dist_max**2

    ## parse coordinates
    d_coords = parse_pdb_coordinates(pdb,chain,)

    if l_coords_protein_alpha == None:
        ## parse alpha carbon atoms
        l_coords_protein_alpha = parse_alpha_carbon_atoms(d_coords,)

    ## calulate hessian matrix
    matrix_hessian_protein = do_interactions(l_coords_protein_alpha,)
    ## diagonalize hessian matrix
    eigenvectors_protein, eigenvalues_protein = NMA.diagonalize_hessian(matrix_hessian_protein,)

    if v_apoholo != None:
        mode_max_apoholo, overlap_max_apoholo, l_factors = find_max_mode_apo_holo(
            pdb,eigenvectors_protein,v_apoholo,
            eigenvalues_protein,
            )
##    ## tmp!!!
##    mode_max_apoholo = 6
##    v1 = v_apoholo
##    v2 = eigenvectors_protein[mode_max_apoholo]
##    overlap_max_apoholo = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2))

##    if 2+2 == 4: ## tmp!!!
##        print 'tmp!!!'
##        return l_factors

    ## determine dimensions of protein
    d_dimensions = determine_protein_dimensions(l_coords_protein_alpha,)
    ## add probe atoms
    fn = '/home/tc/UCD/GV_ligand_binding_site_identification/output/GoodVibes/distmax6_distmin3/%s_%s_probe.pdb' %(pdb,chain,)
    if l_coords_probe:
        print 'a'
        pass
    elif os.path.isfile(fn):
        print 'b'
        l_coords_probe = []
        fd = open(fn)
        lines = fd.readlines()
        fd.close()
        for line in lines:
            record = line[:6].strip()
            if record == 'HETATM' and line[17:20] == 'EXT':
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                coord = numpy.array([x,y,z,])
                l_coords_probe += [coord]
    else:
        l_coords_probe = add_probe_atoms(d_coords,d_dimensions,dist_min_sq,dist_max_sq,)

    ## calculate overlaps
    print 'looping over', len(l_coords_probe), 'probe coordinates'
    l_overlaps = []

    for i in range(len(l_coords_probe)):

        print i, len(l_coords_probe)

        coord_holo = l_coords_probe[i]

        l_coords = l_coords_protein_alpha+[coord_holo]

##        matrix_hessian_holo = do_interactions(l_coords,bool_extra=True,) ## tmp!!!
##        matrix_hessian_holo = do_interactions(l_coords,bool_strong=True) ## tmp!!!
        matrix_hessian_holo = do_interactions(l_coords)

        try:
            eigenvectors_holo, eigenvalues_holo = NMA.diagonalize_hessian(matrix_hessian_holo)
        except:
            print 'exception'
            l_overlaps += [1.]
            continue

        ## compare to x-ray motion
        if v_apoholo != None:
            v1 = v_apoholo
            v2 = eigenvectors_holo[mode_max_apoholo][:-3]
            overlap = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2))
            print 'overlap', overlap

            max_overlap = overlap
            max_mode = mode_max_apoholo

            ## check neigboring modes for max overlap...
            if 2+2 == 5:
                max_mode = mode_max_apoholo
                switch_max = 3
                for mode in range(max(6,mode_max_apoholo-switch_max),mode_max_apoholo+switch_max+1):
                    if mode == mode_max_apoholo:
                        continue
                    v2 = eigenvectors_holo[mode][:len(v_apoholo)]
                    overlap = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2))
                    if overlap > max_overlap:
                        print '********', mode, round(overlap,3), mode_max_apoholo, round(max_overlap,3), mode_max_apoholo, round(overlap_max_apoholo,3), pdb
                        max_overlap = overlap
                        max_mode = mode
                        if mode_max_apoholo < 12 and overlap > 1.2*overlap_max_apoholo:
                            print '******** induced fit?'
            l_overlaps += [max_overlap]

            ## perturb elastic netwrok and recalculate mode contribution
            if 2+2 == 5:
                eigenvectors_holo = numpy.transpose(eigenvectors_holo)
                vector = numpy.array([0.,0.,0.,])
                v_apoholo = numpy.array(list(v_apoholo)+[0.,0.,0.,])
                l_factors_holo = numpy.linalg.solve(eigenvectors_holo,v_apoholo,)
                l_factors_holo_abs = [abs(factor) for factor in l_factors_holo]

                if mode_max_apoholo != list(l_factors_holo_abs).index(max(l_factors_holo_abs)):
                    print mode_max_apoholo, list(l_factors_holo_abs).index(max(l_factors_holo_abs))
                    print mode_max_apoholo, overlap_max_apoholo, overlap
                    print l_factors_holo_abs[mode_max_apoholo], max(l_factors_holo)
                    s = '# mode factor absfactor eigenvalue\n'
                    for i in range(len(l_factors_holo)):
                        s += '%s %s %s\n' %(i+1, l_factors_holo[i],abs(l_factors_holo[i]),)
                    fd = open('facs_eigvals_%s_perturbed.txt' %(pdb),'w')
                    fd.write(s)
                    fd.close()
                    write_pdb(l_overlaps,l_coords_probe,pdb,chain,)
    ##                stop_mode

##            ## tmp!!!
##            v2 = eigenvectors_holo[6][:-3]
##            overlap6 = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2))
##            if overlap6 > 1.1*overlap:
##                print mode_max_apoholo, overlap
##                print 6, overlap6
##                v2 = eigenvectors_holo[7][:-3]
##                overlap7 = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2))
##                print 7, overlap7
##                v2 = eigenvectors_holo[8][:-3]
##                overlap8 = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2))
##                print 8, overlap8
##                stop
            
        elif mode == 'single':
            eigenvectors_holo = eigenvectors_holo[:-3]
            l = []
            ## check first 3 modes in case eigenvalues have swapped
            for mode_holo in range(6,10,):
                overlap = calc_overlap(
                    eigenvectors_protein, eigenvectors_holo,
##                    eigenvalues_protein, eigenvalues_holo,
                    mode_holo = mode_holo,
                    )
                l += [overlap]
                if overlap > 0.9:
                    break
            overlap_max = max(l)

            print pdb, i, len(l_coords_probe), overlap_max

##            ## go for mode 7
##            if overlap_max < 0.9:
##                overlap_max = l[0]
            l_overlaps += [overlap_max]
##            if overlap_max < 0.90:
##                print pdb, i+1, len(l_coords_probe), overlap
##                print calc_overlap(
##                    eigenvectors_protein,eigenvectors_holo,
##                    eigenvalues_protein, eigenvalues_holo,
##                    mode_holo = 6,
##                    )
##                stop
        elif mode == 'multiple':
            eigenvectors_holo = eigenvectors_holo[:-3]
            overlap = calc_overlap(
                eigenvectors_protein,eigenvectors_holo,
                eigenvalues_protein, eigenvalues_holo,
                l_factors = l_factors,
                )
            l_overlaps += [overlap]
            print overlap, i, len(l_coords_probe)
        else:
            print sys.argv
            stop
            
##    fd = open('l_overlaps.txt','r')
##    s = fd.read()
##    fd.close()
##    l_overlaps = s.split()
##    l_overlaps = l_overlaps[1::2]

    ##
    ## combine protein and probe coordinates and add bfactors
    ##

##    d_coords_holo = parse_pdb_coordinates(pdb_holo,chain_holo,)
##    if len(d_coords.keys()) != len(d_coords_holo.keys()):
##        print len(d_coords.keys())
##        print len(d_coords_holo.keys())
##        stop
##    l_coords_protein_alpha_holo = parse_alpha_carbon_atoms(d_coords_holo,)
##    instance_geometry = geometry.geometry()
##    rmsd = instance_geometry.superpose(l_coords_protein_alpha,l_coords_protein_alpha_holo,)
##    tv1 = instance_geometry.fitcenter
##    rm = instance_geometry.rotation
##    tv2 = instance_geometry.refcenter
##    parse_ligand_coordinates(pdb_holo,chain_holo,ligand_ID,)

    if v_apoholo != None and len(l_overlaps) > 1:
        print l_overlaps
        l_overlaps = fix_overlaps(l_overlaps)
        print max(l_overlaps), min(l_overlaps)

    if (
        v_apoholo == None
        or
        (v_apoholo != None and len(l_overlaps) > 1)
        ):
        write_pdb(l_overlaps,l_coords_probe,pdb,chain,)

    if v_apoholo != None:
        d = {
            'mode_max_apoholo':mode_max_apoholo,
            'overlap_max_apoholo':overlap_max_apoholo,
            'l_overlaps':l_overlaps,
            'l_factors':l_factors,
            'eigenvectors':eigenvectors_protein,
            }
        if 2+2 == 5:
            d['l_factors_probe'] = l_factors_holo
            d['max_mode'] = max_mode
            
        return d
    else:
        print 'how much to return to function that called me? just l_overlaps?'
        return l_overlaps
コード例 #8
0
def parse_GoodVibes_exclude_flexible(pdb,path,):

    ##
    ## calculate amplitudes
    ##
    d_mmCIF = parse_mmCIF.main(pdb[:4],)
    d_coords, l_coords_alpha = mmCIF2coords.main(pdb[:4],d_mmCIF,query_chain=pdb[-1])
    print len(l_coords_alpha)
    ##
    ## eigenvector
    ##
    cutoff = 10
    matrix_hessian = NMA.hessian_calculation(l_coords_alpha,cutoff,)
    eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian)
    l_amplitudes = [
        math.sqrt(
            eigenvectors[6][i]**2+eigenvectors[6][i+1]**2+eigenvectors[6][i+2]**2
            )
        for i in range(0,len(eigenvectors[6]),3)
        ]

##    ## write pdb (color by bfactor)
##    l_bfactors = [100*(l_amplitudes[i]-min(l_amplitudes))/(max(l_amplitudes)-min(l_amplitudes)) for i in range(len(l_amplitudes))]
##    fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r')
##    lines = fd.readlines()
##    fd.close()
##    index = [-1,None,]
##    lines_out = []
##    for line in lines:
##        record = line[:6].strip()
##        if record != 'ATOM':
##            lines_out += [line]
##        else:
##            res_no = int(line[22:26])
##            if res_no != index[1]:
##                index = [index[0]+1,res_no,]
##                bfactor = l_bfactors[index[0]]
##            line_out = '%s%6.2f%s' %(line[:60],bfactor,line[66:],)
##            lines_out += [line_out]
##    fd = open('output/%s/%s_%s_probe_color_by_amplitude.pdb' %(path,pdb[:4],pdb[-1],),'w')
##    fd.writelines(lines_out)
##    fd.close()

    ## average amplitude
    average = sum(l_amplitudes)/len(l_amplitudes)
    average,stddev = statistics.do_stddev(l_amplitudes)
    ##
    l_coords_rigid = []
    for i in range(len(l_coords_alpha)):
        if l_amplitudes[i] < average:
            l_coords_rigid += [l_coords_alpha[i]]
    l_coords_flexible = []
    for i in range(len(l_coords_alpha)):
        if l_amplitudes[i] > average+0.5*stddev:
            l_coords_flexible += [l_coords_alpha[i]]

    ## parse output
    fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r')
    lines = fd.readlines()
    fd.close()

    max_bfactor = None
    coord = None
    for line in lines:
        record = line[:6].strip()
        if record not in ['ATOM','HETATM',]:
            continue
        res_name = line[17:20]
        if res_name != 'EXT':
            continue

        bfactor = float(line[60:66])

        if bfactor > max_bfactor:
            x = float(line[30:38])
            y = float(line[38:46])
            z = float(line[46:54])

##            coord_tmp = numpy.array([x,y,z,])

##            bool_vicinal_to_rigid = False
##            for coord_rigid in l_coords_rigid:
##                dist_from_rigid = math.sqrt(sum((coord_rigid-coord_tmp)**2))
##                if dist_from_rigid < 6:
##                    bool_vicinal_to_rigid = True
##                    break
##            if bool_vicinal_to_rigid == False:
##                continue

##            bool_vicinal_to_flexible = False
##            for coord_flexible in l_coords_flexible:
##                dist_from_flexible = math.sqrt(sum((coord_flexible-coord_tmp)**2))
##                if dist_from_flexible < 6:
##                    bool_vicinal_to_flexible = True
##                    break
##            if bool_vicinal_to_flexible == True:
##                continue

##            min_dist = [1000.,None,]
##            for i_coord_alpha in range(len(l_coords_alpha)):
##                coord_alpha = l_coords_alpha[i_coord_alpha]
##                dist_from_alpha = math.sqrt(sum((coord_alpha-coord_tmp)**2))
##                if dist_from_alpha < min_dist[0]:
##                    min_dist = [dist_from_alpha,i_coord_alpha,]
##            if l_amplitudes[min_dist[1]] > average+stddev:
##                continue

            coord = numpy.array([x,y,z,])
            max_bfactor = bfactor

    return coord
コード例 #9
0
    '1u3fA',
    '1agyA',
    '1zioA',
    '1pa9A',
    '2tpsA',
    '2plcA',
    '1qk2A',
    '1j53A',
    '1m21A',
]

cutoff = 10

for pdb in l_pdbs:

    pdb = pdb[:4]

    d = parse_mmCIF.main(pdb, )

    d_coords, l_coords = mmCIF2coords.main(pdb, d, query_chain=pdb[4:])

    matrix_hessian = NMA.hessian_calculation(l_coords, cutoff, verbose=False)

    eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian,
                                                        verbose=False)

    visualization.vmd_arrows(pdb, l_coords, eigenvectors)

    print pdb
    stop