コード例 #1
0
ファイル: test2.py プロジェクト: tommycarstensen/sandbox
import sys
sys.path.append('/home/tc/svn/tc_sandbox/pdb')
import parse_mmCIF, mmCIF2coords
sys.path.append('/home/tc/svn/GoodVibes')
import NMA, visualization

d_mmCIF = parse_mmCIF.main('2lzm',)
d_coords, l_coords_alpha = mmCIF2coords.main('2lzm',d_mmCIF)

cutoff = 10
matrix_hessian = NMA.hessian_calculation(l_coords_alpha, cutoff)
eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian,)
visualization.vmd_trajectory('2lzm',l_coords_alpha,eigenvectors)
コード例 #2
0
def parse_GoodVibes_exclude_flexible(
    pdb,
    path,
):

    ##
    ## calculate amplitudes
    ##
    d_mmCIF = parse_mmCIF.main(pdb[:4], )
    d_coords, l_coords_alpha = mmCIF2coords.main(pdb[:4],
                                                 d_mmCIF,
                                                 query_chain=pdb[-1])
    print len(l_coords_alpha)
    ##
    ## eigenvector
    ##
    cutoff = 10
    matrix_hessian = NMA.hessian_calculation(
        l_coords_alpha,
        cutoff,
    )
    eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian)
    l_amplitudes = [
        math.sqrt(eigenvectors[6][i]**2 + eigenvectors[6][i + 1]**2 +
                  eigenvectors[6][i + 2]**2)
        for i in range(0, len(eigenvectors[6]), 3)
    ]

    ##    ## write pdb (color by bfactor)
    ##    l_bfactors = [100*(l_amplitudes[i]-min(l_amplitudes))/(max(l_amplitudes)-min(l_amplitudes)) for i in range(len(l_amplitudes))]
    ##    fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r')
    ##    lines = fd.readlines()
    ##    fd.close()
    ##    index = [-1,None,]
    ##    lines_out = []
    ##    for line in lines:
    ##        record = line[:6].strip()
    ##        if record != 'ATOM':
    ##            lines_out += [line]
    ##        else:
    ##            res_no = int(line[22:26])
    ##            if res_no != index[1]:
    ##                index = [index[0]+1,res_no,]
    ##                bfactor = l_bfactors[index[0]]
    ##            line_out = '%s%6.2f%s' %(line[:60],bfactor,line[66:],)
    ##            lines_out += [line_out]
    ##    fd = open('output/%s/%s_%s_probe_color_by_amplitude.pdb' %(path,pdb[:4],pdb[-1],),'w')
    ##    fd.writelines(lines_out)
    ##    fd.close()

    ## average amplitude
    average = sum(l_amplitudes) / len(l_amplitudes)
    average, stddev = statistics.do_stddev(l_amplitudes)
    ##
    l_coords_rigid = []
    for i in range(len(l_coords_alpha)):
        if l_amplitudes[i] < average:
            l_coords_rigid += [l_coords_alpha[i]]
    l_coords_flexible = []
    for i in range(len(l_coords_alpha)):
        if l_amplitudes[i] > average + 0.5 * stddev:
            l_coords_flexible += [l_coords_alpha[i]]

    ## parse output
    fd = open('output/%s/%s_%s_probe.pdb' % (
        path,
        pdb[:4],
        pdb[-1],
    ), 'r')
    lines = fd.readlines()
    fd.close()

    max_bfactor = None
    coord = None
    for line in lines:
        record = line[:6].strip()
        if record not in [
                'ATOM',
                'HETATM',
        ]:
            continue
        res_name = line[17:20]
        if res_name != 'EXT':
            continue

        bfactor = float(line[60:66])

        if bfactor > max_bfactor:
            x = float(line[30:38])
            y = float(line[38:46])
            z = float(line[46:54])

            ##            coord_tmp = numpy.array([x,y,z,])

            ##            bool_vicinal_to_rigid = False
            ##            for coord_rigid in l_coords_rigid:
            ##                dist_from_rigid = math.sqrt(sum((coord_rigid-coord_tmp)**2))
            ##                if dist_from_rigid < 6:
            ##                    bool_vicinal_to_rigid = True
            ##                    break
            ##            if bool_vicinal_to_rigid == False:
            ##                continue

            ##            bool_vicinal_to_flexible = False
            ##            for coord_flexible in l_coords_flexible:
            ##                dist_from_flexible = math.sqrt(sum((coord_flexible-coord_tmp)**2))
            ##                if dist_from_flexible < 6:
            ##                    bool_vicinal_to_flexible = True
            ##                    break
            ##            if bool_vicinal_to_flexible == True:
            ##                continue

            ##            min_dist = [1000.,None,]
            ##            for i_coord_alpha in range(len(l_coords_alpha)):
            ##                coord_alpha = l_coords_alpha[i_coord_alpha]
            ##                dist_from_alpha = math.sqrt(sum((coord_alpha-coord_tmp)**2))
            ##                if dist_from_alpha < min_dist[0]:
            ##                    min_dist = [dist_from_alpha,i_coord_alpha,]
            ##            if l_amplitudes[min_dist[1]] > average+stddev:
            ##                continue

            coord = numpy.array([
                x,
                y,
                z,
            ])
            max_bfactor = bfactor

    return coord
コード例 #3
0
import sys
sys.path.append('/home/tc/svn/tc_sandbox/pdb')
import parse_mmCIF, mmCIF2coords
sys.path.append('/home/tc/svn/GoodVibes')
import NMA, visualization

d_mmCIF = parse_mmCIF.main('2lzm', )
d_coords, l_coords_alpha = mmCIF2coords.main('2lzm', d_mmCIF)

cutoff = 10
matrix_hessian = NMA.hessian_calculation(l_coords_alpha, cutoff)
eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian, )
visualization.vmd_trajectory('2lzm', l_coords_alpha, eigenvectors)
コード例 #4
0
    '1czfA', '1thgA', '1booA', '1iu4A', '1bqcA', '206lA', '1cdeA', '1snzA',
    '1gq8A', '1aqlA', '1ps1A', '1s95A', '1pylA', '1ra2A', '1b6bA', '1pntA',
    '1e1aA', '2f9rA', '1v04A', '2nlrA', '1n29A', '1pbgA', '5cpaA', '1agmA',
    '1byaA', '1r76A', '1u5uA', '1vidA', '1h4gA', '1akdA', '1fy2A', '1xqdA',
    '1d6oA', '1qv0A', '1qjeA', '1fvaA', '1bp2A', '1ah7A', '2pthA', '2engA',
    '2acyA', '1qazA', '2a0nA', '1dl2A', '1gp5A', '1onrA', '1cwyA', '1pudA',
    '1bs9A', '1dinA', '1xyzA', '1bwlA', '1eugA', '1idjA', '1g24A', '1oygA',
    '1hzfA', '9papA', '1eb6A', '1ghsA', '1rbnA', '1bixA', '1bs4A', '1celA',
    '1hkaA', '1b02A', '1qibA', '1u3fA', '1agyA', '1zioA', '1pa9A', '2tpsA',
    '2plcA', '1qk2A', '1j53A', '1m21A',
    ]

cutoff = 10

for pdb in l_pdbs:

    pdb = pdb[:4]

    d = parse_mmCIF.main(pdb,)

    d_coords, l_coords = mmCIF2coords.main(pdb, d, query_chain = pdb[4:])

    matrix_hessian = NMA.hessian_calculation(l_coords, cutoff, verbose = False)

    eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian, verbose = False)

    visualization.vmd_arrows(pdb, l_coords, eigenvectors)

    print pdb
    stop
コード例 #5
0
def main():

    set_pdbs = exclude_include()
    l_pdbs_remove = [
        '4a3h','2wf5','1arl','1ee3', ## incorrect _struct_ref_seq.pdbx_db_accession
        '1uyd','1uye','1uyf','2byh','2byi', ## remediation _struct_ref_seq_dif
        '2xdu','3dn8','3dna','1ps3','1ouf','1l35','2eun','1rtc','1zon', ## _struct_ref_seq_dif missing
        '1pwl','1pwm','2fz8','2fz9', ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
        ]
    set_pdbs.remove('1f92') ## remediation _struct_ref_seq_dif incorrect residue number
    set_pdbs.remove('2f6f') ## remediation _pdbx_poly_seq_scheme.auth_mon_id wrong
    set_pdbs.remove('3a5j') ## remediation _struct_ref_seq_dif.db_mon_id is ? but should be MET
    set_pdbs.remove('2rhx') ## remediation _struct_ref_seq_dif.db_mon_id is ? but should be SER
    set_pdbs.remove('2fzb') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('2fzd') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('3dn5') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('1x96') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('1x97') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('1x98') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code
    set_pdbs.remove('1z3n') ## GenBank DBref - not an error...
    set_pdbs.remove('1z8a') ## GenBank DBref - not an error...
    set_pdbs.remove('1z89') ## GenBank DBref - not an error...
    set_pdbs.remove('2pf8') ## stupid use of alt_ids (C for highest occupancy and only altloc)
    set_pdbs.remove('2pyr') ## stupid use of alt_ids (G and R)
    set_pdbs.remove('3pdn') ## stupid use of alt_ids (B and C)
    set_pdbs.remove('2v4c') ## alt_id B used for 100% occupancy atoms
    set_pdbs.remove('1jxt') ## weird alt_id microheterogeneity...
    set_pdbs.remove('1jxu') ## weird alt_id microheterogeneity...
    set_pdbs.remove('1jxw') ## weird alt_id microheterogeneity...
    set_pdbs.remove('1jxx') ## weird alt_id microheterogeneity...
    set_pdbs.remove('1jxy') ## weird alt_id microheterogeneity...
##    set_pdbs.remove('1ac4') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)...
##    set_pdbs.remove('1ac8') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)...
##    set_pdbs.remove('1aeb') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)...
##    set_pdbs.remove('2rbt') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789
##    set_pdbs.remove('2rbu') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789
##    set_pdbs.remove('2rbv') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789
    for pdb in l_pdbs_remove:
        set_pdbs.remove(pdb)

    fd = open('%s/bc-100.out' %(path_mmCIF),'r')
    lines = fd.readlines()
    fd.close()

    for i_line in range(len(lines)):
        cluster = i_line
        if cluster < 4816:
            continue
##        if cluster not in [5,]:
##            continue
        line = lines[i_line]
        l_pdbs = line.lower().split()
        l_pdbs.sort()
        for i_pdb in range(len(l_pdbs)):
            l_pdbs[i_pdb] = l_pdbs[i_pdb][:4]

        for i_pdb1 in range(0,len(l_pdbs)-1):

            pdb1 = l_pdbs[i_pdb1]

##            if pdb1 != '1t49': ## tmp!!!
##                continue

            if not pdb1 in set_pdbs:
                continue

            print pdb1
            stop

            d_mmCIF1 = parse_mmCIF.main(pdb1,)

            bool_monomeric = check_monomeric(d_mmCIF1)
            if bool_monomeric == False:
                if i_pdb1 == 0:
                    break
                else:
                    continue

            bool_remediation_modres = check_modres(d_mmCIF1,pdb1,)
            if bool_remediation_modres == True:
                continue

            if '_struct_ref_seq_dif.details' in d_mmCIF1.keys():
                if 'DELETION' in d_mmCIF1['_struct_ref_seq_dif.details']:
                    continue

            for i_entity in range(len(d_mmCIF1['_entity.id'])):
                if d_mmCIF1['_entity.type'][i_entity] == 'polymer':
                    if int(d_mmCIF1['_entity.pdbx_number_of_molecules'][i_entity]) != 1:
                        print d_mmCIF1['_entity.pdbx_number_of_molecules']
                        print pdb1, cluster
                        stop

            SG1 = d_mmCIF1['_symmetry.space_group_name_H-M']

            for i_pdb2 in range(i_pdb1+1,len(l_pdbs)):

                pdb2 = l_pdbs[i_pdb2]

##                if pdb2 != '2pf8': ## tmp!!!
##                    continue

##                if pdb1 != '3fui' or pdb2 != '3fuj':
##                    continue

                if not pdb2 in set_pdbs:
                    continue

                d_mmCIF2 = parse_mmCIF.main(pdb2,)

                bool_monomeric = check_monomeric(d_mmCIF2)
                if bool_monomeric == False:
                    continue

                bool_remediation_modres = check_modres(d_mmCIF2,pdb2,)
                if bool_remediation_modres == True:
                    continue

                if '_struct_ref_seq_dif.seq_num' in d_mmCIF2.keys():
                    if 'DELETION' in d_mmCIF2['_struct_ref_seq_dif.details']:
                        continue

                ## biounit monomeric?
                for i_entity in range(len(d_mmCIF2['_entity.id'])):
                    if d_mmCIF2['_entity.type'][i_entity] == 'polymer':
                        if int(d_mmCIF2['_entity.pdbx_number_of_molecules'][i_entity]) != 1:
                            continue

                SG2 = d_mmCIF2['_symmetry.space_group_name_H-M']

                if SG1 != SG2:
                    continue

                ## parse coordinates again after being shortened in previous loop
                try:
                    d_coords1, l_coords_alpha1 = mmCIF2coords.main(pdb1, d_mmCIF1)
                except:
                    fd = open('remediation_atom_site.label_alt_id.txt','a')
                    fd.write('%s\n' %(pdb1,))
                    fd.close()
                try:
                    d_coords2, l_coords_alpha2 = mmCIF2coords.main(pdb2, d_mmCIF2)
                except:
                    fd = open('remediation_atom_site.label_alt_id.txt','a')
                    fd.write('%s\n' %(pdb2,))
                    fd.close()

                ## align sequences/coordinates
                try:
                    l_coords_alpha1, l_coords_alpha2 = create_apo_holo_dataset.sequential_alignment_of_coordinates(
                        l_coords_alpha1, l_coords_alpha2,
                        d_mmCIF1, d_mmCIF2,
                        pdb1, pdb2,
                        )
                except:
                    fd = open('remediation_struct_ref_seq_dif.txt','a')
                    fd.write(
                        '%s %s %s %s\n' %(
                            pdb1,pdb2,
                            d_mmCIF1['_struct_ref_seq.pdbx_db_accession'],
                            d_mmCIF2['_struct_ref_seq.pdbx_db_accession'],
                            )
                        )
                    fd.close()
                    continue
                if len(l_coords_alpha1) != len(l_coords_alpha2):
                    print d_mmCIF1['_pdbx_poly_seq_scheme.pdb_mon_id']
                    print d_mmCIF2['_pdbx_poly_seq_scheme.pdb_mon_id']
                    print 'coords', len(l_coords_alpha1), len(l_coords_alpha2)
                    print 'seq', len(d_mmCIF1['_pdbx_poly_seq_scheme.pdb_mon_id'])
                    print 'seq', len(d_mmCIF2['_pdbx_poly_seq_scheme.pdb_mon_id'])
                    print pdb1, pdb2
                    d_coords1, l_coords_alpha1 = mmCIF2coords.main(pdb1, d_mmCIF1)
                    d_coords1, l_coords_alpha2 = mmCIF2coords.main(pdb1, d_mmCIF2)
                    print len(l_coords_alpha1), len(l_coords_alpha2)
                    stop
                    continue

                ##
                ## align structure 1 and 2
                ##
                instance_geometry = geometry.geometry()
                rmsd = instance_geometry.superpose(l_coords_alpha1,l_coords_alpha2)
                tv1 = instance_geometry.fitcenter
                rm = instance_geometry.rotation
                tv2 = instance_geometry.refcenter

                ## structural alignment
                for i_coord in range(len(l_coords_alpha2)):
                    l_coords_alpha2[i_coord] = numpy.dot(l_coords_alpha2[i_coord]-tv1,rm)+tv2

                ##
                ## vector from structure 1 to 2
                ##
                vector = []
                for i in range(len(l_coords_alpha1)):
                    vector += [
                        l_coords_alpha1[i][0]-l_coords_alpha2[i][0],
                        l_coords_alpha1[i][1]-l_coords_alpha2[i][1],
                        l_coords_alpha1[i][2]-l_coords_alpha2[i][2],
                        ]
                vector = numpy.array(vector)

                ##
                ## calculate normal modes of structure 1
                ##
                cutoff = 10
                try:
                    matrix_hessian1 = NMA.hessian_calculation(l_coords_alpha1, cutoff, verbose = False)
                    eigenvectors1, eigenvalues1 = NMA.diagonalize_hessian(matrix_hessian1, verbose = False)
                    matrix_hessian2 = NMA.hessian_calculation(l_coords_alpha2, cutoff, verbose = False)
                    eigenvectors2, eigenvalues2 = NMA.diagonalize_hessian(matrix_hessian2, verbose = False)
                except:
                    continue

                ##
                ## calculate overlap between normal modes and difference vector
                ##
                eigenvector1 = eigenvectors1[6]
                eigenvector2 = eigenvectors2[6]

                overlap1 = calc_overlap(eigenvector1,vector)
                overlap2 = calc_overlap(eigenvector2,vector)
                overlap3a = calc_overlap(eigenvector1,eigenvector2)
                overlap3b = calc_overlap(eigenvectors1[6],eigenvectors2[7])
                overlap3c = calc_overlap(eigenvectors1[7],eigenvectors2[6])
                overlap3 = max(overlap3a,overlap3b,overlap3c)

                fd = open('rmsd_v_overlap2/cluster%i.txt' %(i_line),'a')
                fd.write('%s %s\n' %(rmsd,overlap1))
                fd.close()
                fd = open('rmsd_v_overlap2/cluster%i.txt' %(i_line),'a')
                fd.write('%s %s\n' %(rmsd,overlap2))
                fd.close()
                fd = open('rmsd_v_overlap2/cluster%i_ev_v_ev.txt' %(i_line),'a')
                fd.write('%s %s\n' %(rmsd,overlap3a))
                fd.close()
                fd = open('rmsd_v_overlap2/cluster%i_ev_v_ev_max.txt' %(i_line),'a')
                fd.write('%s %s\n' %(rmsd,overlap3))
                fd.close()
                print pdb1, pdb2, 'cluster', i_line, 'size', len(l_pdbs),
                print 'overlap', '%4.2f' %(round(overlap1,2)), '%4.2f' %(round(overlap2,2)), '%4.2f' %(round(overlap3,2)), 'rmsd', '%4.2f' %(round(rmsd,2))

    return
コード例 #6
0
def parse_GoodVibes_exclude_flexible(pdb,path,):

    ##
    ## calculate amplitudes
    ##
    d_mmCIF = parse_mmCIF.main(pdb[:4],)
    d_coords, l_coords_alpha = mmCIF2coords.main(pdb[:4],d_mmCIF,query_chain=pdb[-1])
    print len(l_coords_alpha)
    ##
    ## eigenvector
    ##
    cutoff = 10
    matrix_hessian = NMA.hessian_calculation(l_coords_alpha,cutoff,)
    eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian)
    l_amplitudes = [
        math.sqrt(
            eigenvectors[6][i]**2+eigenvectors[6][i+1]**2+eigenvectors[6][i+2]**2
            )
        for i in range(0,len(eigenvectors[6]),3)
        ]

##    ## write pdb (color by bfactor)
##    l_bfactors = [100*(l_amplitudes[i]-min(l_amplitudes))/(max(l_amplitudes)-min(l_amplitudes)) for i in range(len(l_amplitudes))]
##    fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r')
##    lines = fd.readlines()
##    fd.close()
##    index = [-1,None,]
##    lines_out = []
##    for line in lines:
##        record = line[:6].strip()
##        if record != 'ATOM':
##            lines_out += [line]
##        else:
##            res_no = int(line[22:26])
##            if res_no != index[1]:
##                index = [index[0]+1,res_no,]
##                bfactor = l_bfactors[index[0]]
##            line_out = '%s%6.2f%s' %(line[:60],bfactor,line[66:],)
##            lines_out += [line_out]
##    fd = open('output/%s/%s_%s_probe_color_by_amplitude.pdb' %(path,pdb[:4],pdb[-1],),'w')
##    fd.writelines(lines_out)
##    fd.close()

    ## average amplitude
    average = sum(l_amplitudes)/len(l_amplitudes)
    average,stddev = statistics.do_stddev(l_amplitudes)
    ##
    l_coords_rigid = []
    for i in range(len(l_coords_alpha)):
        if l_amplitudes[i] < average:
            l_coords_rigid += [l_coords_alpha[i]]
    l_coords_flexible = []
    for i in range(len(l_coords_alpha)):
        if l_amplitudes[i] > average+0.5*stddev:
            l_coords_flexible += [l_coords_alpha[i]]

    ## parse output
    fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r')
    lines = fd.readlines()
    fd.close()

    max_bfactor = None
    coord = None
    for line in lines:
        record = line[:6].strip()
        if record not in ['ATOM','HETATM',]:
            continue
        res_name = line[17:20]
        if res_name != 'EXT':
            continue

        bfactor = float(line[60:66])

        if bfactor > max_bfactor:
            x = float(line[30:38])
            y = float(line[38:46])
            z = float(line[46:54])

##            coord_tmp = numpy.array([x,y,z,])

##            bool_vicinal_to_rigid = False
##            for coord_rigid in l_coords_rigid:
##                dist_from_rigid = math.sqrt(sum((coord_rigid-coord_tmp)**2))
##                if dist_from_rigid < 6:
##                    bool_vicinal_to_rigid = True
##                    break
##            if bool_vicinal_to_rigid == False:
##                continue

##            bool_vicinal_to_flexible = False
##            for coord_flexible in l_coords_flexible:
##                dist_from_flexible = math.sqrt(sum((coord_flexible-coord_tmp)**2))
##                if dist_from_flexible < 6:
##                    bool_vicinal_to_flexible = True
##                    break
##            if bool_vicinal_to_flexible == True:
##                continue

##            min_dist = [1000.,None,]
##            for i_coord_alpha in range(len(l_coords_alpha)):
##                coord_alpha = l_coords_alpha[i_coord_alpha]
##                dist_from_alpha = math.sqrt(sum((coord_alpha-coord_tmp)**2))
##                if dist_from_alpha < min_dist[0]:
##                    min_dist = [dist_from_alpha,i_coord_alpha,]
##            if l_amplitudes[min_dist[1]] > average+stddev:
##                continue

            coord = numpy.array([x,y,z,])
            max_bfactor = bfactor

    return coord
コード例 #7
0
    '1u3fA',
    '1agyA',
    '1zioA',
    '1pa9A',
    '2tpsA',
    '2plcA',
    '1qk2A',
    '1j53A',
    '1m21A',
]

cutoff = 10

for pdb in l_pdbs:

    pdb = pdb[:4]

    d = parse_mmCIF.main(pdb, )

    d_coords, l_coords = mmCIF2coords.main(pdb, d, query_chain=pdb[4:])

    matrix_hessian = NMA.hessian_calculation(l_coords, cutoff, verbose=False)

    eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian,
                                                        verbose=False)

    visualization.vmd_arrows(pdb, l_coords, eigenvectors)

    print pdb
    stop