Beispiel #1
0
def parse_GoodVibes_exclude_flexible(
    pdb,
    path,
):

    ##
    ## calculate amplitudes
    ##
    d_mmCIF = parse_mmCIF.main(pdb[:4], )
    d_coords, l_coords_alpha = mmCIF2coords.main(pdb[:4],
                                                 d_mmCIF,
                                                 query_chain=pdb[-1])
    print len(l_coords_alpha)
    ##
    ## eigenvector
    ##
    cutoff = 10
    matrix_hessian = NMA.hessian_calculation(
        l_coords_alpha,
        cutoff,
    )
    eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian)
    l_amplitudes = [
        math.sqrt(eigenvectors[6][i]**2 + eigenvectors[6][i + 1]**2 +
                  eigenvectors[6][i + 2]**2)
        for i in range(0, len(eigenvectors[6]), 3)
    ]

    ##    ## write pdb (color by bfactor)
    ##    l_bfactors = [100*(l_amplitudes[i]-min(l_amplitudes))/(max(l_amplitudes)-min(l_amplitudes)) for i in range(len(l_amplitudes))]
    ##    fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r')
    ##    lines = fd.readlines()
    ##    fd.close()
    ##    index = [-1,None,]
    ##    lines_out = []
    ##    for line in lines:
    ##        record = line[:6].strip()
    ##        if record != 'ATOM':
    ##            lines_out += [line]
    ##        else:
    ##            res_no = int(line[22:26])
    ##            if res_no != index[1]:
    ##                index = [index[0]+1,res_no,]
    ##                bfactor = l_bfactors[index[0]]
    ##            line_out = '%s%6.2f%s' %(line[:60],bfactor,line[66:],)
    ##            lines_out += [line_out]
    ##    fd = open('output/%s/%s_%s_probe_color_by_amplitude.pdb' %(path,pdb[:4],pdb[-1],),'w')
    ##    fd.writelines(lines_out)
    ##    fd.close()

    ## average amplitude
    average = sum(l_amplitudes) / len(l_amplitudes)
    average, stddev = statistics.do_stddev(l_amplitudes)
    ##
    l_coords_rigid = []
    for i in range(len(l_coords_alpha)):
        if l_amplitudes[i] < average:
            l_coords_rigid += [l_coords_alpha[i]]
    l_coords_flexible = []
    for i in range(len(l_coords_alpha)):
        if l_amplitudes[i] > average + 0.5 * stddev:
            l_coords_flexible += [l_coords_alpha[i]]

    ## parse output
    fd = open('output/%s/%s_%s_probe.pdb' % (
        path,
        pdb[:4],
        pdb[-1],
    ), 'r')
    lines = fd.readlines()
    fd.close()

    max_bfactor = None
    coord = None
    for line in lines:
        record = line[:6].strip()
        if record not in [
                'ATOM',
                'HETATM',
        ]:
            continue
        res_name = line[17:20]
        if res_name != 'EXT':
            continue

        bfactor = float(line[60:66])

        if bfactor > max_bfactor:
            x = float(line[30:38])
            y = float(line[38:46])
            z = float(line[46:54])

            ##            coord_tmp = numpy.array([x,y,z,])

            ##            bool_vicinal_to_rigid = False
            ##            for coord_rigid in l_coords_rigid:
            ##                dist_from_rigid = math.sqrt(sum((coord_rigid-coord_tmp)**2))
            ##                if dist_from_rigid < 6:
            ##                    bool_vicinal_to_rigid = True
            ##                    break
            ##            if bool_vicinal_to_rigid == False:
            ##                continue

            ##            bool_vicinal_to_flexible = False
            ##            for coord_flexible in l_coords_flexible:
            ##                dist_from_flexible = math.sqrt(sum((coord_flexible-coord_tmp)**2))
            ##                if dist_from_flexible < 6:
            ##                    bool_vicinal_to_flexible = True
            ##                    break
            ##            if bool_vicinal_to_flexible == True:
            ##                continue

            ##            min_dist = [1000.,None,]
            ##            for i_coord_alpha in range(len(l_coords_alpha)):
            ##                coord_alpha = l_coords_alpha[i_coord_alpha]
            ##                dist_from_alpha = math.sqrt(sum((coord_alpha-coord_tmp)**2))
            ##                if dist_from_alpha < min_dist[0]:
            ##                    min_dist = [dist_from_alpha,i_coord_alpha,]
            ##            if l_amplitudes[min_dist[1]] > average+stddev:
            ##                continue

            coord = numpy.array([
                x,
                y,
                z,
            ])
            max_bfactor = bfactor

    return coord
Beispiel #2
0
import sys
sys.path.append('/home/tc/svn/tc_sandbox/pdb')
import parse_mmCIF, mmCIF2coords
sys.path.append('/home/tc/svn/GoodVibes')
import NMA, visualization

d_mmCIF = parse_mmCIF.main('2lzm',)
d_coords, l_coords_alpha = mmCIF2coords.main('2lzm',d_mmCIF)

cutoff = 10
matrix_hessian = NMA.hessian_calculation(l_coords_alpha, cutoff)
eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian,)
visualization.vmd_trajectory('2lzm',l_coords_alpha,eigenvectors)
def parse_coords(pdb):

    d_mmCIF = parse_mmCIF.main(pdb,)
    d_coords, l_coords_alpha = mmCIF2coords.main(pdb,d_mmCIF)

    return d_mmCIF, l_coords_alpha
Beispiel #4
0
import sys
sys.path.append('/home/tc/svn/tc_sandbox/pdb')
import parse_mmCIF, mmCIF2coords
sys.path.append('/home/tc/svn/GoodVibes')
import NMA, visualization

d_mmCIF = parse_mmCIF.main('2lzm', )
d_coords, l_coords_alpha = mmCIF2coords.main('2lzm', d_mmCIF)

cutoff = 10
matrix_hessian = NMA.hessian_calculation(l_coords_alpha, cutoff)
eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian, )
visualization.vmd_trajectory('2lzm', l_coords_alpha, eigenvectors)
def parse_coords(pdb):

    d_mmCIF = parse_mmCIF.main(pdb, )
    d_coords, l_coords_alpha = mmCIF2coords.main(pdb, d_mmCIF)

    return d_mmCIF, l_coords_alpha
    '1czfA', '1thgA', '1booA', '1iu4A', '1bqcA', '206lA', '1cdeA', '1snzA',
    '1gq8A', '1aqlA', '1ps1A', '1s95A', '1pylA', '1ra2A', '1b6bA', '1pntA',
    '1e1aA', '2f9rA', '1v04A', '2nlrA', '1n29A', '1pbgA', '5cpaA', '1agmA',
    '1byaA', '1r76A', '1u5uA', '1vidA', '1h4gA', '1akdA', '1fy2A', '1xqdA',
    '1d6oA', '1qv0A', '1qjeA', '1fvaA', '1bp2A', '1ah7A', '2pthA', '2engA',
    '2acyA', '1qazA', '2a0nA', '1dl2A', '1gp5A', '1onrA', '1cwyA', '1pudA',
    '1bs9A', '1dinA', '1xyzA', '1bwlA', '1eugA', '1idjA', '1g24A', '1oygA',
    '1hzfA', '9papA', '1eb6A', '1ghsA', '1rbnA', '1bixA', '1bs4A', '1celA',
    '1hkaA', '1b02A', '1qibA', '1u3fA', '1agyA', '1zioA', '1pa9A', '2tpsA',
    '2plcA', '1qk2A', '1j53A', '1m21A',
    ]

cutoff = 10

for pdb in l_pdbs:

    pdb = pdb[:4]

    d = parse_mmCIF.main(pdb,)

    d_coords, l_coords = mmCIF2coords.main(pdb, d, query_chain = pdb[4:])

    matrix_hessian = NMA.hessian_calculation(l_coords, cutoff, verbose = False)

    eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian, verbose = False)

    visualization.vmd_arrows(pdb, l_coords, eigenvectors)

    print pdb
    stop
def main():

    set_pdbs = exclude_include()
    l_pdbs_remove = [
        '4a3h','2wf5','1arl','1ee3', ## incorrect _struct_ref_seq.pdbx_db_accession
        '1uyd','1uye','1uyf','2byh','2byi', ## remediation _struct_ref_seq_dif
        '2xdu','3dn8','3dna','1ps3','1ouf','1l35','2eun','1rtc','1zon', ## _struct_ref_seq_dif missing
        '1pwl','1pwm','2fz8','2fz9', ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
        ]
    set_pdbs.remove('1f92') ## remediation _struct_ref_seq_dif incorrect residue number
    set_pdbs.remove('2f6f') ## remediation _pdbx_poly_seq_scheme.auth_mon_id wrong
    set_pdbs.remove('3a5j') ## remediation _struct_ref_seq_dif.db_mon_id is ? but should be MET
    set_pdbs.remove('2rhx') ## remediation _struct_ref_seq_dif.db_mon_id is ? but should be SER
    set_pdbs.remove('2fzb') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('2fzd') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('3dn5') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('1x96') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('1x97') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('1x98') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('1z3n') ## GenBank DBref - not an error...
    set_pdbs.remove('1z8a') ## GenBank DBref - not an error...
    set_pdbs.remove('1z89') ## GenBank DBref - not an error...
    set_pdbs.remove('2pf8') ## stupid use of alt_ids (C for highest occupancy and only altloc)
    set_pdbs.remove('2pyr') ## stupid use of alt_ids (G and R)
    set_pdbs.remove('3pdn') ## stupid use of alt_ids (B and C)
    set_pdbs.remove('2v4c') ## alt_id B used for 100% occupancy atoms
    set_pdbs.remove('1jxt') ## weird alt_id microheterogeneity...
    set_pdbs.remove('1jxu') ## weird alt_id microheterogeneity...
    set_pdbs.remove('1jxw') ## weird alt_id microheterogeneity...
    set_pdbs.remove('1jxx') ## weird alt_id microheterogeneity...
    set_pdbs.remove('1jxy') ## weird alt_id microheterogeneity...
##    set_pdbs.remove('1ac4') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)...
##    set_pdbs.remove('1ac8') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)...
##    set_pdbs.remove('1aeb') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)...
##    set_pdbs.remove('2rbt') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789
##    set_pdbs.remove('2rbu') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789
##    set_pdbs.remove('2rbv') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789
    for pdb in l_pdbs_remove:
        set_pdbs.remove(pdb)

    fd = open('%s/bc-100.out' %(path_mmCIF),'r')
    lines = fd.readlines()
    fd.close()

    for i_line in range(len(lines)):
        cluster = i_line
        if cluster < 4816:
            continue
##        if cluster not in [5,]:
##            continue
        line = lines[i_line]
        l_pdbs = line.lower().split()
        l_pdbs.sort()
        for i_pdb in range(len(l_pdbs)):
            l_pdbs[i_pdb] = l_pdbs[i_pdb][:4]

        for i_pdb1 in range(0,len(l_pdbs)-1):

            pdb1 = l_pdbs[i_pdb1]

##            if pdb1 != '1t49': ## tmp!!!
##                continue

            if not pdb1 in set_pdbs:
                continue

            print pdb1
            stop

            d_mmCIF1 = parse_mmCIF.main(pdb1,)

            bool_monomeric = check_monomeric(d_mmCIF1)
            if bool_monomeric == False:
                if i_pdb1 == 0:
                    break
                else:
                    continue

            bool_remediation_modres = check_modres(d_mmCIF1,pdb1,)
            if bool_remediation_modres == True:
                continue

            if '_struct_ref_seq_dif.details' in d_mmCIF1.keys():
                if 'DELETION' in d_mmCIF1['_struct_ref_seq_dif.details']:
                    continue

            for i_entity in range(len(d_mmCIF1['_entity.id'])):
                if d_mmCIF1['_entity.type'][i_entity] == 'polymer':
                    if int(d_mmCIF1['_entity.pdbx_number_of_molecules'][i_entity]) != 1:
                        print d_mmCIF1['_entity.pdbx_number_of_molecules']
                        print pdb1, cluster
                        stop

            SG1 = d_mmCIF1['_symmetry.space_group_name_H-M']

            for i_pdb2 in range(i_pdb1+1,len(l_pdbs)):

                pdb2 = l_pdbs[i_pdb2]

##                if pdb2 != '2pf8': ## tmp!!!
##                    continue

##                if pdb1 != '3fui' or pdb2 != '3fuj':
##                    continue

                if not pdb2 in set_pdbs:
                    continue

                d_mmCIF2 = parse_mmCIF.main(pdb2,)

                bool_monomeric = check_monomeric(d_mmCIF2)
                if bool_monomeric == False:
                    continue

                bool_remediation_modres = check_modres(d_mmCIF2,pdb2,)
                if bool_remediation_modres == True:
                    continue

                if '_struct_ref_seq_dif.seq_num' in d_mmCIF2.keys():
                    if 'DELETION' in d_mmCIF2['_struct_ref_seq_dif.details']:
                        continue

                ## biounit monomeric?
                for i_entity in range(len(d_mmCIF2['_entity.id'])):
                    if d_mmCIF2['_entity.type'][i_entity] == 'polymer':
                        if int(d_mmCIF2['_entity.pdbx_number_of_molecules'][i_entity]) != 1:
                            continue

                SG2 = d_mmCIF2['_symmetry.space_group_name_H-M']

                if SG1 != SG2:
                    continue

                ## parse coordinates again after being shortened in previous loop
                try:
                    d_coords1, l_coords_alpha1 = mmCIF2coords.main(pdb1, d_mmCIF1)
                except:
                    fd = open('remediation_atom_site.label_alt_id.txt','a')
                    fd.write('%s\n' %(pdb1,))
                    fd.close()
                try:
                    d_coords2, l_coords_alpha2 = mmCIF2coords.main(pdb2, d_mmCIF2)
                except:
                    fd = open('remediation_atom_site.label_alt_id.txt','a')
                    fd.write('%s\n' %(pdb2,))
                    fd.close()

                ## align sequences/coordinates
                try:
                    l_coords_alpha1, l_coords_alpha2 = create_apo_holo_dataset.sequential_alignment_of_coordinates(
                        l_coords_alpha1, l_coords_alpha2,
                        d_mmCIF1, d_mmCIF2,
                        pdb1, pdb2,
                        )
                except:
                    fd = open('remediation_struct_ref_seq_dif.txt','a')
                    fd.write(
                        '%s %s %s %s\n' %(
                            pdb1,pdb2,
                            d_mmCIF1['_struct_ref_seq.pdbx_db_accession'],
                            d_mmCIF2['_struct_ref_seq.pdbx_db_accession'],
                            )
                        )
                    fd.close()
                    continue
                if len(l_coords_alpha1) != len(l_coords_alpha2):
                    print d_mmCIF1['_pdbx_poly_seq_scheme.pdb_mon_id']
                    print d_mmCIF2['_pdbx_poly_seq_scheme.pdb_mon_id']
                    print 'coords', len(l_coords_alpha1), len(l_coords_alpha2)
                    print 'seq', len(d_mmCIF1['_pdbx_poly_seq_scheme.pdb_mon_id'])
                    print 'seq', len(d_mmCIF2['_pdbx_poly_seq_scheme.pdb_mon_id'])
                    print pdb1, pdb2
                    d_coords1, l_coords_alpha1 = mmCIF2coords.main(pdb1, d_mmCIF1)
                    d_coords1, l_coords_alpha2 = mmCIF2coords.main(pdb1, d_mmCIF2)
                    print len(l_coords_alpha1), len(l_coords_alpha2)
                    stop
                    continue

                ##
                ## align structure 1 and 2
                ##
                instance_geometry = geometry.geometry()
                rmsd = instance_geometry.superpose(l_coords_alpha1,l_coords_alpha2)
                tv1 = instance_geometry.fitcenter
                rm = instance_geometry.rotation
                tv2 = instance_geometry.refcenter

                ## structural alignment
                for i_coord in range(len(l_coords_alpha2)):
                    l_coords_alpha2[i_coord] = numpy.dot(l_coords_alpha2[i_coord]-tv1,rm)+tv2

                ##
                ## vector from structure 1 to 2
                ##
                vector = []
                for i in range(len(l_coords_alpha1)):
                    vector += [
                        l_coords_alpha1[i][0]-l_coords_alpha2[i][0],
                        l_coords_alpha1[i][1]-l_coords_alpha2[i][1],
                        l_coords_alpha1[i][2]-l_coords_alpha2[i][2],
                        ]
                vector = numpy.array(vector)

                ##
                ## calculate normal modes of structure 1
                ##
                cutoff = 10
                try:
                    matrix_hessian1 = NMA.hessian_calculation(l_coords_alpha1, cutoff, verbose = False)
                    eigenvectors1, eigenvalues1 = NMA.diagonalize_hessian(matrix_hessian1, verbose = False)
                    matrix_hessian2 = NMA.hessian_calculation(l_coords_alpha2, cutoff, verbose = False)
                    eigenvectors2, eigenvalues2 = NMA.diagonalize_hessian(matrix_hessian2, verbose = False)
                except:
                    continue

                ##
                ## calculate overlap between normal modes and difference vector
                ##
                eigenvector1 = eigenvectors1[6]
                eigenvector2 = eigenvectors2[6]

                overlap1 = calc_overlap(eigenvector1,vector)
                overlap2 = calc_overlap(eigenvector2,vector)
                overlap3a = calc_overlap(eigenvector1,eigenvector2)
                overlap3b = calc_overlap(eigenvectors1[6],eigenvectors2[7])
                overlap3c = calc_overlap(eigenvectors1[7],eigenvectors2[6])
                overlap3 = max(overlap3a,overlap3b,overlap3c)

                fd = open('rmsd_v_overlap2/cluster%i.txt' %(i_line),'a')
                fd.write('%s %s\n' %(rmsd,overlap1))
                fd.close()
                fd = open('rmsd_v_overlap2/cluster%i.txt' %(i_line),'a')
                fd.write('%s %s\n' %(rmsd,overlap2))
                fd.close()
                fd = open('rmsd_v_overlap2/cluster%i_ev_v_ev.txt' %(i_line),'a')
                fd.write('%s %s\n' %(rmsd,overlap3a))
                fd.close()
                fd = open('rmsd_v_overlap2/cluster%i_ev_v_ev_max.txt' %(i_line),'a')
                fd.write('%s %s\n' %(rmsd,overlap3))
                fd.close()
                print pdb1, pdb2, 'cluster', i_line, 'size', len(l_pdbs),
                print 'overlap', '%4.2f' %(round(overlap1,2)), '%4.2f' %(round(overlap2,2)), '%4.2f' %(round(overlap3,2)), 'rmsd', '%4.2f' %(round(rmsd,2))

    return
def get_position_ligand(pdb,pdb_apo,d_apo2holo,):

    pdb_holo = d_apo2holo[pdb_apo]['holo']
    d_mmCIF_holo = parse_mmCIF.main(pdb_holo,)
    d_coords, l_coords_alpha_holo = mmCIF2coords.main(pdb_holo,d_mmCIF_holo)

    ##
    ##
    ##
    ligand = d_apo2holo[pdb_apo]['ligand']

    l_residues = []
    for i in range(len(d_mmCIF_holo['_struct_site.id'])):
        if not 'BINDING SITE FOR RESIDUE %s' %(ligand) in d_mmCIF_holo['_struct_site.details'][i]:
            continue
        if len(l_residues) > 0:
            print pdb, pdb_apo, pdb_holo
            print l_residues
            print d_mmCIF_holo['_struct_site.details'][i]
            stop
        struct_site_ID = d_mmCIF_holo['_struct_site.id'][i]
        for j in range(len(d_mmCIF_holo['_struct_site_gen.site_id'])):
            struct_site_gen_ID = d_mmCIF_holo['_struct_site_gen.site_id'][j]
            if struct_site_ID == struct_site_gen_ID:
                residue = int(d_mmCIF_holo['_struct_site_gen.auth_seq_id'][j])
##                l_residues += [residue]
                ## include neighboring residues
                l_residues += [residue-1,residue,residue+1]
    l_residues = list(set(l_residues))
    if len(l_residues) == 0:
        print pdb
        stop

    ## 
    l_coords_ligand = []
    for i in range(len(d_mmCIF_holo['_atom_site.id'])):
        if (
            d_mmCIF_holo['_atom_site.group_PDB'][i] == 'HETATM'
            and
            d_mmCIF_holo['_atom_site.label_comp_id'][i] == ligand
            ):
            x = float(d_mmCIF_holo['_atom_site.Cartn_x'][i])
            y = float(d_mmCIF_holo['_atom_site.Cartn_y'][i])
            z = float(d_mmCIF_holo['_atom_site.Cartn_z'][i])
            coord = numpy.array([x,y,z,])
            l_coords_ligand += [coord]


    d_mmCIF_apo = parse_mmCIF.main(pdb_apo,)
    d_coords, l_coords_alpha_apo = mmCIF2coords.main(pdb_apo,d_mmCIF_apo)   

    ## structural alignment
    ## solution that works in all cases
    ## also for 2d59 and 2d5a, which have residues missing at the Nterm and Cterm, respectively
    ## first non-?
    index1_seq_apo = next((i for i,v in enumerate(d_mmCIF_apo['_pdbx_poly_seq_scheme.pdb_mon_id']) if v != '?'))
    index1_seq_holo = next((i for i,v in enumerate(d_mmCIF_holo['_pdbx_poly_seq_scheme.pdb_mon_id']) if v != '?'))
    ## last non-?
    index2_seq_apo = len(d_mmCIF_apo['_pdbx_poly_seq_scheme.pdb_mon_id'])-next((i for i,v in enumerate(reversed(d_mmCIF_apo['_pdbx_poly_seq_scheme.pdb_mon_id'])) if v != '?'))
    index2_seq_holo = len(d_mmCIF_holo['_pdbx_poly_seq_scheme.pdb_mon_id'])-next((i for i,v in enumerate(reversed(d_mmCIF_holo['_pdbx_poly_seq_scheme.pdb_mon_id'])) if v != '?'))
    ## first common non-?
    index1_coord_apo = max(0,index1_seq_holo-index1_seq_apo)
    index1_coord_holo = max(0,index1_seq_apo-index1_seq_holo)
    ## last common non-?
    index2_coord_apo = len(l_coords_alpha_apo)+min(0,index2_seq_holo-index2_seq_apo)
    index2_coord_holo = len(l_coords_alpha_holo)+min(0,index2_seq_apo-index2_seq_holo)
    l_coords_alpha_apo = l_coords_alpha_apo[index1_coord_apo:index2_coord_apo]
    l_coords_alpha_holo = l_coords_alpha_holo[index1_coord_holo:index2_coord_holo]


    if pdb == pdb_apo:
        l_seq_num = d_mmCIF_apo['_pdbx_poly_seq_scheme.pdb_seq_num'][index1_coord_apo:index2_coord_apo]
        chain = ''.join(d_mmCIF_apo['_entity_poly.pdbx_strand_id'])
        n_residues = len(l_coords_alpha_apo)
        l_coords_alpha = l_coords_alpha_apo
    else:
        l_seq_num = d_mmCIF_holo['_pdbx_poly_seq_scheme.pdb_seq_num'][index1_coord_holo:index2_coord_holo]
        chain = ''.join(d_mmCIF_holo['_entity_poly.pdbx_strand_id'])
        n_residues = len(l_coords_alpha_holo)
        l_coords_alpha = l_coords_alpha_holo

    overlap_site = 1.
##    ##
##    ## eigenvector
##    ##
##    cutoff = 10
##    matrix_hessian = NMA.hessian_calculation(l_coords_alpha,cutoff,)
##    eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian)
##
##    ## apply transformation matrix
##    if pdb == pdb_apo:
##        instance_geometry = geometry.geometry()
##        rmsd = instance_geometry.superpose(l_coords_alpha_apo,l_coords_alpha_holo,)
##        tv1 = instance_geometry.fitcenter
##        rm = instance_geometry.rotation
##        tv2 = instance_geometry.refcenter
##        for i_coord in range(len(l_coords_ligand)):
##            l_coords_ligand[i_coord] = numpy.dot(l_coords_ligand[i_coord]-tv1,rm)+tv2
##
##    ##
##    ## apo/holo eigenvector
##    ##
##    vector_apo2holo = []
##    for i in range(len(l_coords_alpha_holo)):
##        vector_apo2holo += [
##            l_coords_alpha_holo[i][0]-l_coords_alpha_apo[i][0],
##            l_coords_alpha_holo[i][1]-l_coords_alpha_apo[i][1],
##            l_coords_alpha_holo[i][2]-l_coords_alpha_apo[i][2],
##            ]
##    vector_apo2holo = numpy.array(vector_apo2holo)
##
##    ##
##    ## calculate overlap between normal modes and difference vector
##    ## in the ligand binding site!!!
##    ##
##    vector_apo2holo_site = []
##    eigenvector_site = []
##    ## exclude coordinate not at the ligand binding site
##    for i_seq_num in range(len(l_seq_num)):
##        seq_num = int(l_seq_num[i_seq_num])
##        if seq_num in l_residues:
##            eigenvector_site += list(eigenvectors[6][3*i_seq_num:3*i_seq_num+3])
##            vector_apo2holo_site += list(vector_apo2holo[3*i_seq_num:3*i_seq_num+3])
##    ## calculate overlap
##    vector_apo2holo_site = numpy.array(vector_apo2holo_site)
##    eigenvector_site = numpy.array(eigenvector_site)
##    overlap_site = abs(
##        numpy.dot(eigenvector_site,vector_apo2holo_site)
##        /
##        math.sqrt(
##            numpy.dot(eigenvector_site,eigenvector_site)
##            *
##            numpy.dot(vector_apo2holo_site,vector_apo2holo_site)
##            )
##        )
##    if overlap_site > 0.8:
##        print vector_apo2holo_site
##        print eigenvector_site
##        print pdb
##        print l_residues

    position_ligand = sum(l_coords_ligand)/len(l_coords_ligand)

    n_atoms = len(l_coords_ligand)

    return position_ligand, chain, n_residues, n_atoms, ligand, overlap_site
def parse_GoodVibes_exclude_flexible(pdb,path,):

    ##
    ## calculate amplitudes
    ##
    d_mmCIF = parse_mmCIF.main(pdb[:4],)
    d_coords, l_coords_alpha = mmCIF2coords.main(pdb[:4],d_mmCIF,query_chain=pdb[-1])
    print len(l_coords_alpha)
    ##
    ## eigenvector
    ##
    cutoff = 10
    matrix_hessian = NMA.hessian_calculation(l_coords_alpha,cutoff,)
    eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian)
    l_amplitudes = [
        math.sqrt(
            eigenvectors[6][i]**2+eigenvectors[6][i+1]**2+eigenvectors[6][i+2]**2
            )
        for i in range(0,len(eigenvectors[6]),3)
        ]

##    ## write pdb (color by bfactor)
##    l_bfactors = [100*(l_amplitudes[i]-min(l_amplitudes))/(max(l_amplitudes)-min(l_amplitudes)) for i in range(len(l_amplitudes))]
##    fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r')
##    lines = fd.readlines()
##    fd.close()
##    index = [-1,None,]
##    lines_out = []
##    for line in lines:
##        record = line[:6].strip()
##        if record != 'ATOM':
##            lines_out += [line]
##        else:
##            res_no = int(line[22:26])
##            if res_no != index[1]:
##                index = [index[0]+1,res_no,]
##                bfactor = l_bfactors[index[0]]
##            line_out = '%s%6.2f%s' %(line[:60],bfactor,line[66:],)
##            lines_out += [line_out]
##    fd = open('output/%s/%s_%s_probe_color_by_amplitude.pdb' %(path,pdb[:4],pdb[-1],),'w')
##    fd.writelines(lines_out)
##    fd.close()

    ## average amplitude
    average = sum(l_amplitudes)/len(l_amplitudes)
    average,stddev = statistics.do_stddev(l_amplitudes)
    ##
    l_coords_rigid = []
    for i in range(len(l_coords_alpha)):
        if l_amplitudes[i] < average:
            l_coords_rigid += [l_coords_alpha[i]]
    l_coords_flexible = []
    for i in range(len(l_coords_alpha)):
        if l_amplitudes[i] > average+0.5*stddev:
            l_coords_flexible += [l_coords_alpha[i]]

    ## parse output
    fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r')
    lines = fd.readlines()
    fd.close()

    max_bfactor = None
    coord = None
    for line in lines:
        record = line[:6].strip()
        if record not in ['ATOM','HETATM',]:
            continue
        res_name = line[17:20]
        if res_name != 'EXT':
            continue

        bfactor = float(line[60:66])

        if bfactor > max_bfactor:
            x = float(line[30:38])
            y = float(line[38:46])
            z = float(line[46:54])

##            coord_tmp = numpy.array([x,y,z,])

##            bool_vicinal_to_rigid = False
##            for coord_rigid in l_coords_rigid:
##                dist_from_rigid = math.sqrt(sum((coord_rigid-coord_tmp)**2))
##                if dist_from_rigid < 6:
##                    bool_vicinal_to_rigid = True
##                    break
##            if bool_vicinal_to_rigid == False:
##                continue

##            bool_vicinal_to_flexible = False
##            for coord_flexible in l_coords_flexible:
##                dist_from_flexible = math.sqrt(sum((coord_flexible-coord_tmp)**2))
##                if dist_from_flexible < 6:
##                    bool_vicinal_to_flexible = True
##                    break
##            if bool_vicinal_to_flexible == True:
##                continue

##            min_dist = [1000.,None,]
##            for i_coord_alpha in range(len(l_coords_alpha)):
##                coord_alpha = l_coords_alpha[i_coord_alpha]
##                dist_from_alpha = math.sqrt(sum((coord_alpha-coord_tmp)**2))
##                if dist_from_alpha < min_dist[0]:
##                    min_dist = [dist_from_alpha,i_coord_alpha,]
##            if l_amplitudes[min_dist[1]] > average+stddev:
##                continue

            coord = numpy.array([x,y,z,])
            max_bfactor = bfactor

    return coord
Beispiel #10
0
    '1u3fA',
    '1agyA',
    '1zioA',
    '1pa9A',
    '2tpsA',
    '2plcA',
    '1qk2A',
    '1j53A',
    '1m21A',
]

cutoff = 10

for pdb in l_pdbs:

    pdb = pdb[:4]

    d = parse_mmCIF.main(pdb, )

    d_coords, l_coords = mmCIF2coords.main(pdb, d, query_chain=pdb[4:])

    matrix_hessian = NMA.hessian_calculation(l_coords, cutoff, verbose=False)

    eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian,
                                                        verbose=False)

    visualization.vmd_arrows(pdb, l_coords, eigenvectors)

    print pdb
    stop