import sys sys.path.append('/home/tc/svn/tc_sandbox/pdb') import parse_mmCIF, mmCIF2coords sys.path.append('/home/tc/svn/GoodVibes') import NMA, visualization d_mmCIF = parse_mmCIF.main('2lzm',) d_coords, l_coords_alpha = mmCIF2coords.main('2lzm',d_mmCIF) cutoff = 10 matrix_hessian = NMA.hessian_calculation(l_coords_alpha, cutoff) eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian,) visualization.vmd_trajectory('2lzm',l_coords_alpha,eigenvectors)
def parse_GoodVibes_exclude_flexible( pdb, path, ): ## ## calculate amplitudes ## d_mmCIF = parse_mmCIF.main(pdb[:4], ) d_coords, l_coords_alpha = mmCIF2coords.main(pdb[:4], d_mmCIF, query_chain=pdb[-1]) print len(l_coords_alpha) ## ## eigenvector ## cutoff = 10 matrix_hessian = NMA.hessian_calculation( l_coords_alpha, cutoff, ) eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian) l_amplitudes = [ math.sqrt(eigenvectors[6][i]**2 + eigenvectors[6][i + 1]**2 + eigenvectors[6][i + 2]**2) for i in range(0, len(eigenvectors[6]), 3) ] ## ## write pdb (color by bfactor) ## l_bfactors = [100*(l_amplitudes[i]-min(l_amplitudes))/(max(l_amplitudes)-min(l_amplitudes)) for i in range(len(l_amplitudes))] ## fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r') ## lines = fd.readlines() ## fd.close() ## index = [-1,None,] ## lines_out = [] ## for line in lines: ## record = line[:6].strip() ## if record != 'ATOM': ## lines_out += [line] ## else: ## res_no = int(line[22:26]) ## if res_no != index[1]: ## index = [index[0]+1,res_no,] ## bfactor = l_bfactors[index[0]] ## line_out = '%s%6.2f%s' %(line[:60],bfactor,line[66:],) ## lines_out += [line_out] ## fd = open('output/%s/%s_%s_probe_color_by_amplitude.pdb' %(path,pdb[:4],pdb[-1],),'w') ## fd.writelines(lines_out) ## fd.close() ## average amplitude average = sum(l_amplitudes) / len(l_amplitudes) average, stddev = statistics.do_stddev(l_amplitudes) ## l_coords_rigid = [] for i in range(len(l_coords_alpha)): if l_amplitudes[i] < average: l_coords_rigid += [l_coords_alpha[i]] l_coords_flexible = [] for i in range(len(l_coords_alpha)): if l_amplitudes[i] > average + 0.5 * stddev: l_coords_flexible += [l_coords_alpha[i]] ## parse output fd = open('output/%s/%s_%s_probe.pdb' % ( path, pdb[:4], pdb[-1], ), 'r') lines = fd.readlines() fd.close() max_bfactor = None coord = None for line in lines: record = line[:6].strip() if record not in [ 'ATOM', 'HETATM', ]: continue res_name = line[17:20] if res_name != 'EXT': continue bfactor = float(line[60:66]) if bfactor > max_bfactor: x = float(line[30:38]) y = float(line[38:46]) z = float(line[46:54]) ## coord_tmp = numpy.array([x,y,z,]) ## bool_vicinal_to_rigid = False ## for coord_rigid in l_coords_rigid: ## dist_from_rigid = math.sqrt(sum((coord_rigid-coord_tmp)**2)) ## if dist_from_rigid < 6: ## bool_vicinal_to_rigid = True ## break ## if bool_vicinal_to_rigid == False: ## continue ## bool_vicinal_to_flexible = False ## for coord_flexible in l_coords_flexible: ## dist_from_flexible = math.sqrt(sum((coord_flexible-coord_tmp)**2)) ## if dist_from_flexible < 6: ## bool_vicinal_to_flexible = True ## break ## if bool_vicinal_to_flexible == True: ## continue ## min_dist = [1000.,None,] ## for i_coord_alpha in range(len(l_coords_alpha)): ## coord_alpha = l_coords_alpha[i_coord_alpha] ## dist_from_alpha = math.sqrt(sum((coord_alpha-coord_tmp)**2)) ## if dist_from_alpha < min_dist[0]: ## min_dist = [dist_from_alpha,i_coord_alpha,] ## if l_amplitudes[min_dist[1]] > average+stddev: ## continue coord = numpy.array([ x, y, z, ]) max_bfactor = bfactor return coord
def main( pdb, chain, dist_max, dist_min, mode='single', v_apoholo=None, l_coords_probe=None, l_coords_protein_alpha=None, ): ## ## settings ## dist_min_sq = dist_min**2 dist_max_sq = dist_max**2 ## parse coordinates d_coords = parse_pdb_coordinates( pdb, chain, ) if l_coords_protein_alpha == None: ## parse alpha carbon atoms l_coords_protein_alpha = parse_alpha_carbon_atoms(d_coords, ) ## calulate hessian matrix matrix_hessian_protein = do_interactions(l_coords_protein_alpha, ) ## diagonalize hessian matrix eigenvectors_protein, eigenvalues_protein = NMA.diagonalize_hessian( matrix_hessian_protein, ) if v_apoholo != None: mode_max_apoholo, overlap_max_apoholo, l_factors = find_max_mode_apo_holo( pdb, eigenvectors_protein, v_apoholo, eigenvalues_protein, ) ## ## tmp!!! ## mode_max_apoholo = 6 ## v1 = v_apoholo ## v2 = eigenvectors_protein[mode_max_apoholo] ## overlap_max_apoholo = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2)) ## if 2+2 == 4: ## tmp!!! ## print 'tmp!!!' ## return l_factors ## determine dimensions of protein d_dimensions = determine_protein_dimensions(l_coords_protein_alpha, ) ## add probe atoms fn = '/home/tc/UCD/GV_ligand_binding_site_identification/output/GoodVibes/distmax6_distmin3/%s_%s_probe.pdb' % ( pdb, chain, ) if l_coords_probe: print 'a' pass elif os.path.isfile(fn): print 'b' l_coords_probe = [] fd = open(fn) lines = fd.readlines() fd.close() for line in lines: record = line[:6].strip() if record == 'HETATM' and line[17:20] == 'EXT': x = float(line[30:38]) y = float(line[38:46]) z = float(line[46:54]) coord = numpy.array([ x, y, z, ]) l_coords_probe += [coord] else: l_coords_probe = add_probe_atoms( d_coords, d_dimensions, dist_min_sq, dist_max_sq, ) ## calculate overlaps print 'looping over', len(l_coords_probe), 'probe coordinates' l_overlaps = [] for i in range(len(l_coords_probe)): print i, len(l_coords_probe) coord_holo = l_coords_probe[i] l_coords = l_coords_protein_alpha + [coord_holo] ## matrix_hessian_holo = do_interactions(l_coords,bool_extra=True,) ## tmp!!! ## matrix_hessian_holo = do_interactions(l_coords,bool_strong=True) ## tmp!!! matrix_hessian_holo = do_interactions(l_coords) try: eigenvectors_holo, eigenvalues_holo = NMA.diagonalize_hessian( matrix_hessian_holo) except: print 'exception' l_overlaps += [1.] continue ## compare to x-ray motion if v_apoholo != None: v1 = v_apoholo v2 = eigenvectors_holo[mode_max_apoholo][:-3] overlap = abs(numpy.dot(v1, v2)) / math.sqrt( numpy.dot(v1, v1) * numpy.dot(v2, v2)) print 'overlap', overlap max_overlap = overlap max_mode = mode_max_apoholo ## check neigboring modes for max overlap... if 2 + 2 == 5: max_mode = mode_max_apoholo switch_max = 3 for mode in range(max(6, mode_max_apoholo - switch_max), mode_max_apoholo + switch_max + 1): if mode == mode_max_apoholo: continue v2 = eigenvectors_holo[mode][:len(v_apoholo)] overlap = abs(numpy.dot(v1, v2)) / math.sqrt( numpy.dot(v1, v1) * numpy.dot(v2, v2)) if overlap > max_overlap: print '********', mode, round( overlap, 3), mode_max_apoholo, round( max_overlap, 3), mode_max_apoholo, round( overlap_max_apoholo, 3), pdb max_overlap = overlap max_mode = mode if mode_max_apoholo < 12 and overlap > 1.2 * overlap_max_apoholo: print '******** induced fit?' l_overlaps += [max_overlap] ## perturb elastic netwrok and recalculate mode contribution if 2 + 2 == 5: eigenvectors_holo = numpy.transpose(eigenvectors_holo) vector = numpy.array([ 0., 0., 0., ]) v_apoholo = numpy.array(list(v_apoholo) + [ 0., 0., 0., ]) l_factors_holo = numpy.linalg.solve( eigenvectors_holo, v_apoholo, ) l_factors_holo_abs = [abs(factor) for factor in l_factors_holo] if mode_max_apoholo != list(l_factors_holo_abs).index( max(l_factors_holo_abs)): print mode_max_apoholo, list(l_factors_holo_abs).index( max(l_factors_holo_abs)) print mode_max_apoholo, overlap_max_apoholo, overlap print l_factors_holo_abs[mode_max_apoholo], max( l_factors_holo) s = '# mode factor absfactor eigenvalue\n' for i in range(len(l_factors_holo)): s += '%s %s %s\n' % ( i + 1, l_factors_holo[i], abs(l_factors_holo[i]), ) fd = open('facs_eigvals_%s_perturbed.txt' % (pdb), 'w') fd.write(s) fd.close() write_pdb( l_overlaps, l_coords_probe, pdb, chain, ) ## stop_mode ## ## tmp!!! ## v2 = eigenvectors_holo[6][:-3] ## overlap6 = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2)) ## if overlap6 > 1.1*overlap: ## print mode_max_apoholo, overlap ## print 6, overlap6 ## v2 = eigenvectors_holo[7][:-3] ## overlap7 = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2)) ## print 7, overlap7 ## v2 = eigenvectors_holo[8][:-3] ## overlap8 = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2)) ## print 8, overlap8 ## stop elif mode == 'single': eigenvectors_holo = eigenvectors_holo[:-3] l = [] ## check first 3 modes in case eigenvalues have swapped for mode_holo in range( 6, 10, ): overlap = calc_overlap( eigenvectors_protein, eigenvectors_holo, ## eigenvalues_protein, eigenvalues_holo, mode_holo=mode_holo, ) l += [overlap] if overlap > 0.9: break overlap_max = max(l) print pdb, i, len(l_coords_probe), overlap_max ## ## go for mode 7 ## if overlap_max < 0.9: ## overlap_max = l[0] l_overlaps += [overlap_max] ## if overlap_max < 0.90: ## print pdb, i+1, len(l_coords_probe), overlap ## print calc_overlap( ## eigenvectors_protein,eigenvectors_holo, ## eigenvalues_protein, eigenvalues_holo, ## mode_holo = 6, ## ) ## stop elif mode == 'multiple': eigenvectors_holo = eigenvectors_holo[:-3] overlap = calc_overlap( eigenvectors_protein, eigenvectors_holo, eigenvalues_protein, eigenvalues_holo, l_factors=l_factors, ) l_overlaps += [overlap] print overlap, i, len(l_coords_probe) else: print sys.argv stop ## fd = open('l_overlaps.txt','r') ## s = fd.read() ## fd.close() ## l_overlaps = s.split() ## l_overlaps = l_overlaps[1::2] ## ## combine protein and probe coordinates and add bfactors ## ## d_coords_holo = parse_pdb_coordinates(pdb_holo,chain_holo,) ## if len(d_coords.keys()) != len(d_coords_holo.keys()): ## print len(d_coords.keys()) ## print len(d_coords_holo.keys()) ## stop ## l_coords_protein_alpha_holo = parse_alpha_carbon_atoms(d_coords_holo,) ## instance_geometry = geometry.geometry() ## rmsd = instance_geometry.superpose(l_coords_protein_alpha,l_coords_protein_alpha_holo,) ## tv1 = instance_geometry.fitcenter ## rm = instance_geometry.rotation ## tv2 = instance_geometry.refcenter ## parse_ligand_coordinates(pdb_holo,chain_holo,ligand_ID,) if v_apoholo != None and len(l_overlaps) > 1: print l_overlaps l_overlaps = fix_overlaps(l_overlaps) print max(l_overlaps), min(l_overlaps) if (v_apoholo == None or (v_apoholo != None and len(l_overlaps) > 1)): write_pdb( l_overlaps, l_coords_probe, pdb, chain, ) if v_apoholo != None: d = { 'mode_max_apoholo': mode_max_apoholo, 'overlap_max_apoholo': overlap_max_apoholo, 'l_overlaps': l_overlaps, 'l_factors': l_factors, 'eigenvectors': eigenvectors_protein, } if 2 + 2 == 5: d['l_factors_probe'] = l_factors_holo d['max_mode'] = max_mode return d else: print 'how much to return to function that called me? just l_overlaps?' return l_overlaps
import sys sys.path.append('/home/tc/svn/tc_sandbox/pdb') import parse_mmCIF, mmCIF2coords sys.path.append('/home/tc/svn/GoodVibes') import NMA, visualization d_mmCIF = parse_mmCIF.main('2lzm', ) d_coords, l_coords_alpha = mmCIF2coords.main('2lzm', d_mmCIF) cutoff = 10 matrix_hessian = NMA.hessian_calculation(l_coords_alpha, cutoff) eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian, ) visualization.vmd_trajectory('2lzm', l_coords_alpha, eigenvectors)
'1czfA', '1thgA', '1booA', '1iu4A', '1bqcA', '206lA', '1cdeA', '1snzA', '1gq8A', '1aqlA', '1ps1A', '1s95A', '1pylA', '1ra2A', '1b6bA', '1pntA', '1e1aA', '2f9rA', '1v04A', '2nlrA', '1n29A', '1pbgA', '5cpaA', '1agmA', '1byaA', '1r76A', '1u5uA', '1vidA', '1h4gA', '1akdA', '1fy2A', '1xqdA', '1d6oA', '1qv0A', '1qjeA', '1fvaA', '1bp2A', '1ah7A', '2pthA', '2engA', '2acyA', '1qazA', '2a0nA', '1dl2A', '1gp5A', '1onrA', '1cwyA', '1pudA', '1bs9A', '1dinA', '1xyzA', '1bwlA', '1eugA', '1idjA', '1g24A', '1oygA', '1hzfA', '9papA', '1eb6A', '1ghsA', '1rbnA', '1bixA', '1bs4A', '1celA', '1hkaA', '1b02A', '1qibA', '1u3fA', '1agyA', '1zioA', '1pa9A', '2tpsA', '2plcA', '1qk2A', '1j53A', '1m21A', ] cutoff = 10 for pdb in l_pdbs: pdb = pdb[:4] d = parse_mmCIF.main(pdb,) d_coords, l_coords = mmCIF2coords.main(pdb, d, query_chain = pdb[4:]) matrix_hessian = NMA.hessian_calculation(l_coords, cutoff, verbose = False) eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian, verbose = False) visualization.vmd_arrows(pdb, l_coords, eigenvectors) print pdb stop
def main(): set_pdbs = exclude_include() l_pdbs_remove = [ '4a3h','2wf5','1arl','1ee3', ## incorrect _struct_ref_seq.pdbx_db_accession '1uyd','1uye','1uyf','2byh','2byi', ## remediation _struct_ref_seq_dif '2xdu','3dn8','3dna','1ps3','1ouf','1l35','2eun','1rtc','1zon', ## _struct_ref_seq_dif missing '1pwl','1pwm','2fz8','2fz9', ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code ] set_pdbs.remove('1f92') ## remediation _struct_ref_seq_dif incorrect residue number set_pdbs.remove('2f6f') ## remediation _pdbx_poly_seq_scheme.auth_mon_id wrong set_pdbs.remove('3a5j') ## remediation _struct_ref_seq_dif.db_mon_id is ? but should be MET set_pdbs.remove('2rhx') ## remediation _struct_ref_seq_dif.db_mon_id is ? but should be SER set_pdbs.remove('2fzb') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code set_pdbs.remove('2fzd') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code set_pdbs.remove('3dn5') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code set_pdbs.remove('1x96') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code set_pdbs.remove('1x97') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code set_pdbs.remove('1x98') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code set_pdbs.remove('1z3n') ## GenBank DBref - not an error... set_pdbs.remove('1z8a') ## GenBank DBref - not an error... set_pdbs.remove('1z89') ## GenBank DBref - not an error... set_pdbs.remove('2pf8') ## stupid use of alt_ids (C for highest occupancy and only altloc) set_pdbs.remove('2pyr') ## stupid use of alt_ids (G and R) set_pdbs.remove('3pdn') ## stupid use of alt_ids (B and C) set_pdbs.remove('2v4c') ## alt_id B used for 100% occupancy atoms set_pdbs.remove('1jxt') ## weird alt_id microheterogeneity... set_pdbs.remove('1jxu') ## weird alt_id microheterogeneity... set_pdbs.remove('1jxw') ## weird alt_id microheterogeneity... set_pdbs.remove('1jxx') ## weird alt_id microheterogeneity... set_pdbs.remove('1jxy') ## weird alt_id microheterogeneity... ## set_pdbs.remove('1ac4') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... ## set_pdbs.remove('1ac8') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... ## set_pdbs.remove('1aeb') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... ## set_pdbs.remove('2rbt') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789 ## set_pdbs.remove('2rbu') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789 ## set_pdbs.remove('2rbv') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789 for pdb in l_pdbs_remove: set_pdbs.remove(pdb) fd = open('%s/bc-100.out' %(path_mmCIF),'r') lines = fd.readlines() fd.close() for i_line in range(len(lines)): cluster = i_line if cluster < 4816: continue ## if cluster not in [5,]: ## continue line = lines[i_line] l_pdbs = line.lower().split() l_pdbs.sort() for i_pdb in range(len(l_pdbs)): l_pdbs[i_pdb] = l_pdbs[i_pdb][:4] for i_pdb1 in range(0,len(l_pdbs)-1): pdb1 = l_pdbs[i_pdb1] ## if pdb1 != '1t49': ## tmp!!! ## continue if not pdb1 in set_pdbs: continue print pdb1 stop d_mmCIF1 = parse_mmCIF.main(pdb1,) bool_monomeric = check_monomeric(d_mmCIF1) if bool_monomeric == False: if i_pdb1 == 0: break else: continue bool_remediation_modres = check_modres(d_mmCIF1,pdb1,) if bool_remediation_modres == True: continue if '_struct_ref_seq_dif.details' in d_mmCIF1.keys(): if 'DELETION' in d_mmCIF1['_struct_ref_seq_dif.details']: continue for i_entity in range(len(d_mmCIF1['_entity.id'])): if d_mmCIF1['_entity.type'][i_entity] == 'polymer': if int(d_mmCIF1['_entity.pdbx_number_of_molecules'][i_entity]) != 1: print d_mmCIF1['_entity.pdbx_number_of_molecules'] print pdb1, cluster stop SG1 = d_mmCIF1['_symmetry.space_group_name_H-M'] for i_pdb2 in range(i_pdb1+1,len(l_pdbs)): pdb2 = l_pdbs[i_pdb2] ## if pdb2 != '2pf8': ## tmp!!! ## continue ## if pdb1 != '3fui' or pdb2 != '3fuj': ## continue if not pdb2 in set_pdbs: continue d_mmCIF2 = parse_mmCIF.main(pdb2,) bool_monomeric = check_monomeric(d_mmCIF2) if bool_monomeric == False: continue bool_remediation_modres = check_modres(d_mmCIF2,pdb2,) if bool_remediation_modres == True: continue if '_struct_ref_seq_dif.seq_num' in d_mmCIF2.keys(): if 'DELETION' in d_mmCIF2['_struct_ref_seq_dif.details']: continue ## biounit monomeric? for i_entity in range(len(d_mmCIF2['_entity.id'])): if d_mmCIF2['_entity.type'][i_entity] == 'polymer': if int(d_mmCIF2['_entity.pdbx_number_of_molecules'][i_entity]) != 1: continue SG2 = d_mmCIF2['_symmetry.space_group_name_H-M'] if SG1 != SG2: continue ## parse coordinates again after being shortened in previous loop try: d_coords1, l_coords_alpha1 = mmCIF2coords.main(pdb1, d_mmCIF1) except: fd = open('remediation_atom_site.label_alt_id.txt','a') fd.write('%s\n' %(pdb1,)) fd.close() try: d_coords2, l_coords_alpha2 = mmCIF2coords.main(pdb2, d_mmCIF2) except: fd = open('remediation_atom_site.label_alt_id.txt','a') fd.write('%s\n' %(pdb2,)) fd.close() ## align sequences/coordinates try: l_coords_alpha1, l_coords_alpha2 = create_apo_holo_dataset.sequential_alignment_of_coordinates( l_coords_alpha1, l_coords_alpha2, d_mmCIF1, d_mmCIF2, pdb1, pdb2, ) except: fd = open('remediation_struct_ref_seq_dif.txt','a') fd.write( '%s %s %s %s\n' %( pdb1,pdb2, d_mmCIF1['_struct_ref_seq.pdbx_db_accession'], d_mmCIF2['_struct_ref_seq.pdbx_db_accession'], ) ) fd.close() continue if len(l_coords_alpha1) != len(l_coords_alpha2): print d_mmCIF1['_pdbx_poly_seq_scheme.pdb_mon_id'] print d_mmCIF2['_pdbx_poly_seq_scheme.pdb_mon_id'] print 'coords', len(l_coords_alpha1), len(l_coords_alpha2) print 'seq', len(d_mmCIF1['_pdbx_poly_seq_scheme.pdb_mon_id']) print 'seq', len(d_mmCIF2['_pdbx_poly_seq_scheme.pdb_mon_id']) print pdb1, pdb2 d_coords1, l_coords_alpha1 = mmCIF2coords.main(pdb1, d_mmCIF1) d_coords1, l_coords_alpha2 = mmCIF2coords.main(pdb1, d_mmCIF2) print len(l_coords_alpha1), len(l_coords_alpha2) stop continue ## ## align structure 1 and 2 ## instance_geometry = geometry.geometry() rmsd = instance_geometry.superpose(l_coords_alpha1,l_coords_alpha2) tv1 = instance_geometry.fitcenter rm = instance_geometry.rotation tv2 = instance_geometry.refcenter ## structural alignment for i_coord in range(len(l_coords_alpha2)): l_coords_alpha2[i_coord] = numpy.dot(l_coords_alpha2[i_coord]-tv1,rm)+tv2 ## ## vector from structure 1 to 2 ## vector = [] for i in range(len(l_coords_alpha1)): vector += [ l_coords_alpha1[i][0]-l_coords_alpha2[i][0], l_coords_alpha1[i][1]-l_coords_alpha2[i][1], l_coords_alpha1[i][2]-l_coords_alpha2[i][2], ] vector = numpy.array(vector) ## ## calculate normal modes of structure 1 ## cutoff = 10 try: matrix_hessian1 = NMA.hessian_calculation(l_coords_alpha1, cutoff, verbose = False) eigenvectors1, eigenvalues1 = NMA.diagonalize_hessian(matrix_hessian1, verbose = False) matrix_hessian2 = NMA.hessian_calculation(l_coords_alpha2, cutoff, verbose = False) eigenvectors2, eigenvalues2 = NMA.diagonalize_hessian(matrix_hessian2, verbose = False) except: continue ## ## calculate overlap between normal modes and difference vector ## eigenvector1 = eigenvectors1[6] eigenvector2 = eigenvectors2[6] overlap1 = calc_overlap(eigenvector1,vector) overlap2 = calc_overlap(eigenvector2,vector) overlap3a = calc_overlap(eigenvector1,eigenvector2) overlap3b = calc_overlap(eigenvectors1[6],eigenvectors2[7]) overlap3c = calc_overlap(eigenvectors1[7],eigenvectors2[6]) overlap3 = max(overlap3a,overlap3b,overlap3c) fd = open('rmsd_v_overlap2/cluster%i.txt' %(i_line),'a') fd.write('%s %s\n' %(rmsd,overlap1)) fd.close() fd = open('rmsd_v_overlap2/cluster%i.txt' %(i_line),'a') fd.write('%s %s\n' %(rmsd,overlap2)) fd.close() fd = open('rmsd_v_overlap2/cluster%i_ev_v_ev.txt' %(i_line),'a') fd.write('%s %s\n' %(rmsd,overlap3a)) fd.close() fd = open('rmsd_v_overlap2/cluster%i_ev_v_ev_max.txt' %(i_line),'a') fd.write('%s %s\n' %(rmsd,overlap3)) fd.close() print pdb1, pdb2, 'cluster', i_line, 'size', len(l_pdbs), print 'overlap', '%4.2f' %(round(overlap1,2)), '%4.2f' %(round(overlap2,2)), '%4.2f' %(round(overlap3,2)), 'rmsd', '%4.2f' %(round(rmsd,2)) return
def main( pdb,chain,dist_max,dist_min,mode='single',v_apoholo=None,l_coords_probe=None, l_coords_protein_alpha=None, ): ## ## settings ## dist_min_sq = dist_min**2 dist_max_sq = dist_max**2 ## parse coordinates d_coords = parse_pdb_coordinates(pdb,chain,) if l_coords_protein_alpha == None: ## parse alpha carbon atoms l_coords_protein_alpha = parse_alpha_carbon_atoms(d_coords,) ## calulate hessian matrix matrix_hessian_protein = do_interactions(l_coords_protein_alpha,) ## diagonalize hessian matrix eigenvectors_protein, eigenvalues_protein = NMA.diagonalize_hessian(matrix_hessian_protein,) if v_apoholo != None: mode_max_apoholo, overlap_max_apoholo, l_factors = find_max_mode_apo_holo( pdb,eigenvectors_protein,v_apoholo, eigenvalues_protein, ) ## ## tmp!!! ## mode_max_apoholo = 6 ## v1 = v_apoholo ## v2 = eigenvectors_protein[mode_max_apoholo] ## overlap_max_apoholo = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2)) ## if 2+2 == 4: ## tmp!!! ## print 'tmp!!!' ## return l_factors ## determine dimensions of protein d_dimensions = determine_protein_dimensions(l_coords_protein_alpha,) ## add probe atoms fn = '/home/tc/UCD/GV_ligand_binding_site_identification/output/GoodVibes/distmax6_distmin3/%s_%s_probe.pdb' %(pdb,chain,) if l_coords_probe: print 'a' pass elif os.path.isfile(fn): print 'b' l_coords_probe = [] fd = open(fn) lines = fd.readlines() fd.close() for line in lines: record = line[:6].strip() if record == 'HETATM' and line[17:20] == 'EXT': x = float(line[30:38]) y = float(line[38:46]) z = float(line[46:54]) coord = numpy.array([x,y,z,]) l_coords_probe += [coord] else: l_coords_probe = add_probe_atoms(d_coords,d_dimensions,dist_min_sq,dist_max_sq,) ## calculate overlaps print 'looping over', len(l_coords_probe), 'probe coordinates' l_overlaps = [] for i in range(len(l_coords_probe)): print i, len(l_coords_probe) coord_holo = l_coords_probe[i] l_coords = l_coords_protein_alpha+[coord_holo] ## matrix_hessian_holo = do_interactions(l_coords,bool_extra=True,) ## tmp!!! ## matrix_hessian_holo = do_interactions(l_coords,bool_strong=True) ## tmp!!! matrix_hessian_holo = do_interactions(l_coords) try: eigenvectors_holo, eigenvalues_holo = NMA.diagonalize_hessian(matrix_hessian_holo) except: print 'exception' l_overlaps += [1.] continue ## compare to x-ray motion if v_apoholo != None: v1 = v_apoholo v2 = eigenvectors_holo[mode_max_apoholo][:-3] overlap = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2)) print 'overlap', overlap max_overlap = overlap max_mode = mode_max_apoholo ## check neigboring modes for max overlap... if 2+2 == 5: max_mode = mode_max_apoholo switch_max = 3 for mode in range(max(6,mode_max_apoholo-switch_max),mode_max_apoholo+switch_max+1): if mode == mode_max_apoholo: continue v2 = eigenvectors_holo[mode][:len(v_apoholo)] overlap = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2)) if overlap > max_overlap: print '********', mode, round(overlap,3), mode_max_apoholo, round(max_overlap,3), mode_max_apoholo, round(overlap_max_apoholo,3), pdb max_overlap = overlap max_mode = mode if mode_max_apoholo < 12 and overlap > 1.2*overlap_max_apoholo: print '******** induced fit?' l_overlaps += [max_overlap] ## perturb elastic netwrok and recalculate mode contribution if 2+2 == 5: eigenvectors_holo = numpy.transpose(eigenvectors_holo) vector = numpy.array([0.,0.,0.,]) v_apoholo = numpy.array(list(v_apoholo)+[0.,0.,0.,]) l_factors_holo = numpy.linalg.solve(eigenvectors_holo,v_apoholo,) l_factors_holo_abs = [abs(factor) for factor in l_factors_holo] if mode_max_apoholo != list(l_factors_holo_abs).index(max(l_factors_holo_abs)): print mode_max_apoholo, list(l_factors_holo_abs).index(max(l_factors_holo_abs)) print mode_max_apoholo, overlap_max_apoholo, overlap print l_factors_holo_abs[mode_max_apoholo], max(l_factors_holo) s = '# mode factor absfactor eigenvalue\n' for i in range(len(l_factors_holo)): s += '%s %s %s\n' %(i+1, l_factors_holo[i],abs(l_factors_holo[i]),) fd = open('facs_eigvals_%s_perturbed.txt' %(pdb),'w') fd.write(s) fd.close() write_pdb(l_overlaps,l_coords_probe,pdb,chain,) ## stop_mode ## ## tmp!!! ## v2 = eigenvectors_holo[6][:-3] ## overlap6 = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2)) ## if overlap6 > 1.1*overlap: ## print mode_max_apoholo, overlap ## print 6, overlap6 ## v2 = eigenvectors_holo[7][:-3] ## overlap7 = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2)) ## print 7, overlap7 ## v2 = eigenvectors_holo[8][:-3] ## overlap8 = abs(numpy.dot(v1,v2))/math.sqrt(numpy.dot(v1,v1)*numpy.dot(v2,v2)) ## print 8, overlap8 ## stop elif mode == 'single': eigenvectors_holo = eigenvectors_holo[:-3] l = [] ## check first 3 modes in case eigenvalues have swapped for mode_holo in range(6,10,): overlap = calc_overlap( eigenvectors_protein, eigenvectors_holo, ## eigenvalues_protein, eigenvalues_holo, mode_holo = mode_holo, ) l += [overlap] if overlap > 0.9: break overlap_max = max(l) print pdb, i, len(l_coords_probe), overlap_max ## ## go for mode 7 ## if overlap_max < 0.9: ## overlap_max = l[0] l_overlaps += [overlap_max] ## if overlap_max < 0.90: ## print pdb, i+1, len(l_coords_probe), overlap ## print calc_overlap( ## eigenvectors_protein,eigenvectors_holo, ## eigenvalues_protein, eigenvalues_holo, ## mode_holo = 6, ## ) ## stop elif mode == 'multiple': eigenvectors_holo = eigenvectors_holo[:-3] overlap = calc_overlap( eigenvectors_protein,eigenvectors_holo, eigenvalues_protein, eigenvalues_holo, l_factors = l_factors, ) l_overlaps += [overlap] print overlap, i, len(l_coords_probe) else: print sys.argv stop ## fd = open('l_overlaps.txt','r') ## s = fd.read() ## fd.close() ## l_overlaps = s.split() ## l_overlaps = l_overlaps[1::2] ## ## combine protein and probe coordinates and add bfactors ## ## d_coords_holo = parse_pdb_coordinates(pdb_holo,chain_holo,) ## if len(d_coords.keys()) != len(d_coords_holo.keys()): ## print len(d_coords.keys()) ## print len(d_coords_holo.keys()) ## stop ## l_coords_protein_alpha_holo = parse_alpha_carbon_atoms(d_coords_holo,) ## instance_geometry = geometry.geometry() ## rmsd = instance_geometry.superpose(l_coords_protein_alpha,l_coords_protein_alpha_holo,) ## tv1 = instance_geometry.fitcenter ## rm = instance_geometry.rotation ## tv2 = instance_geometry.refcenter ## parse_ligand_coordinates(pdb_holo,chain_holo,ligand_ID,) if v_apoholo != None and len(l_overlaps) > 1: print l_overlaps l_overlaps = fix_overlaps(l_overlaps) print max(l_overlaps), min(l_overlaps) if ( v_apoholo == None or (v_apoholo != None and len(l_overlaps) > 1) ): write_pdb(l_overlaps,l_coords_probe,pdb,chain,) if v_apoholo != None: d = { 'mode_max_apoholo':mode_max_apoholo, 'overlap_max_apoholo':overlap_max_apoholo, 'l_overlaps':l_overlaps, 'l_factors':l_factors, 'eigenvectors':eigenvectors_protein, } if 2+2 == 5: d['l_factors_probe'] = l_factors_holo d['max_mode'] = max_mode return d else: print 'how much to return to function that called me? just l_overlaps?' return l_overlaps
def parse_GoodVibes_exclude_flexible(pdb,path,): ## ## calculate amplitudes ## d_mmCIF = parse_mmCIF.main(pdb[:4],) d_coords, l_coords_alpha = mmCIF2coords.main(pdb[:4],d_mmCIF,query_chain=pdb[-1]) print len(l_coords_alpha) ## ## eigenvector ## cutoff = 10 matrix_hessian = NMA.hessian_calculation(l_coords_alpha,cutoff,) eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian) l_amplitudes = [ math.sqrt( eigenvectors[6][i]**2+eigenvectors[6][i+1]**2+eigenvectors[6][i+2]**2 ) for i in range(0,len(eigenvectors[6]),3) ] ## ## write pdb (color by bfactor) ## l_bfactors = [100*(l_amplitudes[i]-min(l_amplitudes))/(max(l_amplitudes)-min(l_amplitudes)) for i in range(len(l_amplitudes))] ## fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r') ## lines = fd.readlines() ## fd.close() ## index = [-1,None,] ## lines_out = [] ## for line in lines: ## record = line[:6].strip() ## if record != 'ATOM': ## lines_out += [line] ## else: ## res_no = int(line[22:26]) ## if res_no != index[1]: ## index = [index[0]+1,res_no,] ## bfactor = l_bfactors[index[0]] ## line_out = '%s%6.2f%s' %(line[:60],bfactor,line[66:],) ## lines_out += [line_out] ## fd = open('output/%s/%s_%s_probe_color_by_amplitude.pdb' %(path,pdb[:4],pdb[-1],),'w') ## fd.writelines(lines_out) ## fd.close() ## average amplitude average = sum(l_amplitudes)/len(l_amplitudes) average,stddev = statistics.do_stddev(l_amplitudes) ## l_coords_rigid = [] for i in range(len(l_coords_alpha)): if l_amplitudes[i] < average: l_coords_rigid += [l_coords_alpha[i]] l_coords_flexible = [] for i in range(len(l_coords_alpha)): if l_amplitudes[i] > average+0.5*stddev: l_coords_flexible += [l_coords_alpha[i]] ## parse output fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r') lines = fd.readlines() fd.close() max_bfactor = None coord = None for line in lines: record = line[:6].strip() if record not in ['ATOM','HETATM',]: continue res_name = line[17:20] if res_name != 'EXT': continue bfactor = float(line[60:66]) if bfactor > max_bfactor: x = float(line[30:38]) y = float(line[38:46]) z = float(line[46:54]) ## coord_tmp = numpy.array([x,y,z,]) ## bool_vicinal_to_rigid = False ## for coord_rigid in l_coords_rigid: ## dist_from_rigid = math.sqrt(sum((coord_rigid-coord_tmp)**2)) ## if dist_from_rigid < 6: ## bool_vicinal_to_rigid = True ## break ## if bool_vicinal_to_rigid == False: ## continue ## bool_vicinal_to_flexible = False ## for coord_flexible in l_coords_flexible: ## dist_from_flexible = math.sqrt(sum((coord_flexible-coord_tmp)**2)) ## if dist_from_flexible < 6: ## bool_vicinal_to_flexible = True ## break ## if bool_vicinal_to_flexible == True: ## continue ## min_dist = [1000.,None,] ## for i_coord_alpha in range(len(l_coords_alpha)): ## coord_alpha = l_coords_alpha[i_coord_alpha] ## dist_from_alpha = math.sqrt(sum((coord_alpha-coord_tmp)**2)) ## if dist_from_alpha < min_dist[0]: ## min_dist = [dist_from_alpha,i_coord_alpha,] ## if l_amplitudes[min_dist[1]] > average+stddev: ## continue coord = numpy.array([x,y,z,]) max_bfactor = bfactor return coord
'1u3fA', '1agyA', '1zioA', '1pa9A', '2tpsA', '2plcA', '1qk2A', '1j53A', '1m21A', ] cutoff = 10 for pdb in l_pdbs: pdb = pdb[:4] d = parse_mmCIF.main(pdb, ) d_coords, l_coords = mmCIF2coords.main(pdb, d, query_chain=pdb[4:]) matrix_hessian = NMA.hessian_calculation(l_coords, cutoff, verbose=False) eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian, verbose=False) visualization.vmd_arrows(pdb, l_coords, eigenvectors) print pdb stop