def parse_GoodVibes_exclude_flexible( pdb, path, ): ## ## calculate amplitudes ## d_mmCIF = parse_mmCIF.main(pdb[:4], ) d_coords, l_coords_alpha = mmCIF2coords.main(pdb[:4], d_mmCIF, query_chain=pdb[-1]) print len(l_coords_alpha) ## ## eigenvector ## cutoff = 10 matrix_hessian = NMA.hessian_calculation( l_coords_alpha, cutoff, ) eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian) l_amplitudes = [ math.sqrt(eigenvectors[6][i]**2 + eigenvectors[6][i + 1]**2 + eigenvectors[6][i + 2]**2) for i in range(0, len(eigenvectors[6]), 3) ] ## ## write pdb (color by bfactor) ## l_bfactors = [100*(l_amplitudes[i]-min(l_amplitudes))/(max(l_amplitudes)-min(l_amplitudes)) for i in range(len(l_amplitudes))] ## fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r') ## lines = fd.readlines() ## fd.close() ## index = [-1,None,] ## lines_out = [] ## for line in lines: ## record = line[:6].strip() ## if record != 'ATOM': ## lines_out += [line] ## else: ## res_no = int(line[22:26]) ## if res_no != index[1]: ## index = [index[0]+1,res_no,] ## bfactor = l_bfactors[index[0]] ## line_out = '%s%6.2f%s' %(line[:60],bfactor,line[66:],) ## lines_out += [line_out] ## fd = open('output/%s/%s_%s_probe_color_by_amplitude.pdb' %(path,pdb[:4],pdb[-1],),'w') ## fd.writelines(lines_out) ## fd.close() ## average amplitude average = sum(l_amplitudes) / len(l_amplitudes) average, stddev = statistics.do_stddev(l_amplitudes) ## l_coords_rigid = [] for i in range(len(l_coords_alpha)): if l_amplitudes[i] < average: l_coords_rigid += [l_coords_alpha[i]] l_coords_flexible = [] for i in range(len(l_coords_alpha)): if l_amplitudes[i] > average + 0.5 * stddev: l_coords_flexible += [l_coords_alpha[i]] ## parse output fd = open('output/%s/%s_%s_probe.pdb' % ( path, pdb[:4], pdb[-1], ), 'r') lines = fd.readlines() fd.close() max_bfactor = None coord = None for line in lines: record = line[:6].strip() if record not in [ 'ATOM', 'HETATM', ]: continue res_name = line[17:20] if res_name != 'EXT': continue bfactor = float(line[60:66]) if bfactor > max_bfactor: x = float(line[30:38]) y = float(line[38:46]) z = float(line[46:54]) ## coord_tmp = numpy.array([x,y,z,]) ## bool_vicinal_to_rigid = False ## for coord_rigid in l_coords_rigid: ## dist_from_rigid = math.sqrt(sum((coord_rigid-coord_tmp)**2)) ## if dist_from_rigid < 6: ## bool_vicinal_to_rigid = True ## break ## if bool_vicinal_to_rigid == False: ## continue ## bool_vicinal_to_flexible = False ## for coord_flexible in l_coords_flexible: ## dist_from_flexible = math.sqrt(sum((coord_flexible-coord_tmp)**2)) ## if dist_from_flexible < 6: ## bool_vicinal_to_flexible = True ## break ## if bool_vicinal_to_flexible == True: ## continue ## min_dist = [1000.,None,] ## for i_coord_alpha in range(len(l_coords_alpha)): ## coord_alpha = l_coords_alpha[i_coord_alpha] ## dist_from_alpha = math.sqrt(sum((coord_alpha-coord_tmp)**2)) ## if dist_from_alpha < min_dist[0]: ## min_dist = [dist_from_alpha,i_coord_alpha,] ## if l_amplitudes[min_dist[1]] > average+stddev: ## continue coord = numpy.array([ x, y, z, ]) max_bfactor = bfactor return coord
import sys sys.path.append('/home/tc/svn/tc_sandbox/pdb') import parse_mmCIF, mmCIF2coords sys.path.append('/home/tc/svn/GoodVibes') import NMA, visualization d_mmCIF = parse_mmCIF.main('2lzm',) d_coords, l_coords_alpha = mmCIF2coords.main('2lzm',d_mmCIF) cutoff = 10 matrix_hessian = NMA.hessian_calculation(l_coords_alpha, cutoff) eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian,) visualization.vmd_trajectory('2lzm',l_coords_alpha,eigenvectors)
def parse_coords(pdb): d_mmCIF = parse_mmCIF.main(pdb,) d_coords, l_coords_alpha = mmCIF2coords.main(pdb,d_mmCIF) return d_mmCIF, l_coords_alpha
import sys sys.path.append('/home/tc/svn/tc_sandbox/pdb') import parse_mmCIF, mmCIF2coords sys.path.append('/home/tc/svn/GoodVibes') import NMA, visualization d_mmCIF = parse_mmCIF.main('2lzm', ) d_coords, l_coords_alpha = mmCIF2coords.main('2lzm', d_mmCIF) cutoff = 10 matrix_hessian = NMA.hessian_calculation(l_coords_alpha, cutoff) eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian, ) visualization.vmd_trajectory('2lzm', l_coords_alpha, eigenvectors)
def parse_coords(pdb): d_mmCIF = parse_mmCIF.main(pdb, ) d_coords, l_coords_alpha = mmCIF2coords.main(pdb, d_mmCIF) return d_mmCIF, l_coords_alpha
'1czfA', '1thgA', '1booA', '1iu4A', '1bqcA', '206lA', '1cdeA', '1snzA', '1gq8A', '1aqlA', '1ps1A', '1s95A', '1pylA', '1ra2A', '1b6bA', '1pntA', '1e1aA', '2f9rA', '1v04A', '2nlrA', '1n29A', '1pbgA', '5cpaA', '1agmA', '1byaA', '1r76A', '1u5uA', '1vidA', '1h4gA', '1akdA', '1fy2A', '1xqdA', '1d6oA', '1qv0A', '1qjeA', '1fvaA', '1bp2A', '1ah7A', '2pthA', '2engA', '2acyA', '1qazA', '2a0nA', '1dl2A', '1gp5A', '1onrA', '1cwyA', '1pudA', '1bs9A', '1dinA', '1xyzA', '1bwlA', '1eugA', '1idjA', '1g24A', '1oygA', '1hzfA', '9papA', '1eb6A', '1ghsA', '1rbnA', '1bixA', '1bs4A', '1celA', '1hkaA', '1b02A', '1qibA', '1u3fA', '1agyA', '1zioA', '1pa9A', '2tpsA', '2plcA', '1qk2A', '1j53A', '1m21A', ] cutoff = 10 for pdb in l_pdbs: pdb = pdb[:4] d = parse_mmCIF.main(pdb,) d_coords, l_coords = mmCIF2coords.main(pdb, d, query_chain = pdb[4:]) matrix_hessian = NMA.hessian_calculation(l_coords, cutoff, verbose = False) eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian, verbose = False) visualization.vmd_arrows(pdb, l_coords, eigenvectors) print pdb stop
def main(): set_pdbs = exclude_include() l_pdbs_remove = [ '4a3h','2wf5','1arl','1ee3', ## incorrect _struct_ref_seq.pdbx_db_accession '1uyd','1uye','1uyf','2byh','2byi', ## remediation _struct_ref_seq_dif '2xdu','3dn8','3dna','1ps3','1ouf','1l35','2eun','1rtc','1zon', ## _struct_ref_seq_dif missing '1pwl','1pwm','2fz8','2fz9', ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code ] set_pdbs.remove('1f92') ## remediation _struct_ref_seq_dif incorrect residue number set_pdbs.remove('2f6f') ## remediation _pdbx_poly_seq_scheme.auth_mon_id wrong set_pdbs.remove('3a5j') ## remediation _struct_ref_seq_dif.db_mon_id is ? but should be MET set_pdbs.remove('2rhx') ## remediation _struct_ref_seq_dif.db_mon_id is ? but should be SER set_pdbs.remove('2fzb') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code set_pdbs.remove('2fzd') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code set_pdbs.remove('3dn5') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code set_pdbs.remove('1x96') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code set_pdbs.remove('1x97') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code set_pdbs.remove('1x98') ## remediation incorrect _struct_ref.pdbx_seq_one_letter_code set_pdbs.remove('1z3n') ## GenBank DBref - not an error... set_pdbs.remove('1z8a') ## GenBank DBref - not an error... set_pdbs.remove('1z89') ## GenBank DBref - not an error... set_pdbs.remove('2pf8') ## stupid use of alt_ids (C for highest occupancy and only altloc) set_pdbs.remove('2pyr') ## stupid use of alt_ids (G and R) set_pdbs.remove('3pdn') ## stupid use of alt_ids (B and C) set_pdbs.remove('2v4c') ## alt_id B used for 100% occupancy atoms set_pdbs.remove('1jxt') ## weird alt_id microheterogeneity... set_pdbs.remove('1jxu') ## weird alt_id microheterogeneity... set_pdbs.remove('1jxw') ## weird alt_id microheterogeneity... set_pdbs.remove('1jxx') ## weird alt_id microheterogeneity... set_pdbs.remove('1jxy') ## weird alt_id microheterogeneity... ## set_pdbs.remove('1ac4') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... ## set_pdbs.remove('1ac8') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... ## set_pdbs.remove('1aeb') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... ## set_pdbs.remove('2rbt') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789 ## set_pdbs.remove('2rbu') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789 ## set_pdbs.remove('2rbv') ## multiple strains and taxonomy ids but all same organism (S. cerevisiae)... UNP A7A026, TAX 307796, STRAIN YJM789 for pdb in l_pdbs_remove: set_pdbs.remove(pdb) fd = open('%s/bc-100.out' %(path_mmCIF),'r') lines = fd.readlines() fd.close() for i_line in range(len(lines)): cluster = i_line if cluster < 4816: continue ## if cluster not in [5,]: ## continue line = lines[i_line] l_pdbs = line.lower().split() l_pdbs.sort() for i_pdb in range(len(l_pdbs)): l_pdbs[i_pdb] = l_pdbs[i_pdb][:4] for i_pdb1 in range(0,len(l_pdbs)-1): pdb1 = l_pdbs[i_pdb1] ## if pdb1 != '1t49': ## tmp!!! ## continue if not pdb1 in set_pdbs: continue print pdb1 stop d_mmCIF1 = parse_mmCIF.main(pdb1,) bool_monomeric = check_monomeric(d_mmCIF1) if bool_monomeric == False: if i_pdb1 == 0: break else: continue bool_remediation_modres = check_modres(d_mmCIF1,pdb1,) if bool_remediation_modres == True: continue if '_struct_ref_seq_dif.details' in d_mmCIF1.keys(): if 'DELETION' in d_mmCIF1['_struct_ref_seq_dif.details']: continue for i_entity in range(len(d_mmCIF1['_entity.id'])): if d_mmCIF1['_entity.type'][i_entity] == 'polymer': if int(d_mmCIF1['_entity.pdbx_number_of_molecules'][i_entity]) != 1: print d_mmCIF1['_entity.pdbx_number_of_molecules'] print pdb1, cluster stop SG1 = d_mmCIF1['_symmetry.space_group_name_H-M'] for i_pdb2 in range(i_pdb1+1,len(l_pdbs)): pdb2 = l_pdbs[i_pdb2] ## if pdb2 != '2pf8': ## tmp!!! ## continue ## if pdb1 != '3fui' or pdb2 != '3fuj': ## continue if not pdb2 in set_pdbs: continue d_mmCIF2 = parse_mmCIF.main(pdb2,) bool_monomeric = check_monomeric(d_mmCIF2) if bool_monomeric == False: continue bool_remediation_modres = check_modres(d_mmCIF2,pdb2,) if bool_remediation_modres == True: continue if '_struct_ref_seq_dif.seq_num' in d_mmCIF2.keys(): if 'DELETION' in d_mmCIF2['_struct_ref_seq_dif.details']: continue ## biounit monomeric? for i_entity in range(len(d_mmCIF2['_entity.id'])): if d_mmCIF2['_entity.type'][i_entity] == 'polymer': if int(d_mmCIF2['_entity.pdbx_number_of_molecules'][i_entity]) != 1: continue SG2 = d_mmCIF2['_symmetry.space_group_name_H-M'] if SG1 != SG2: continue ## parse coordinates again after being shortened in previous loop try: d_coords1, l_coords_alpha1 = mmCIF2coords.main(pdb1, d_mmCIF1) except: fd = open('remediation_atom_site.label_alt_id.txt','a') fd.write('%s\n' %(pdb1,)) fd.close() try: d_coords2, l_coords_alpha2 = mmCIF2coords.main(pdb2, d_mmCIF2) except: fd = open('remediation_atom_site.label_alt_id.txt','a') fd.write('%s\n' %(pdb2,)) fd.close() ## align sequences/coordinates try: l_coords_alpha1, l_coords_alpha2 = create_apo_holo_dataset.sequential_alignment_of_coordinates( l_coords_alpha1, l_coords_alpha2, d_mmCIF1, d_mmCIF2, pdb1, pdb2, ) except: fd = open('remediation_struct_ref_seq_dif.txt','a') fd.write( '%s %s %s %s\n' %( pdb1,pdb2, d_mmCIF1['_struct_ref_seq.pdbx_db_accession'], d_mmCIF2['_struct_ref_seq.pdbx_db_accession'], ) ) fd.close() continue if len(l_coords_alpha1) != len(l_coords_alpha2): print d_mmCIF1['_pdbx_poly_seq_scheme.pdb_mon_id'] print d_mmCIF2['_pdbx_poly_seq_scheme.pdb_mon_id'] print 'coords', len(l_coords_alpha1), len(l_coords_alpha2) print 'seq', len(d_mmCIF1['_pdbx_poly_seq_scheme.pdb_mon_id']) print 'seq', len(d_mmCIF2['_pdbx_poly_seq_scheme.pdb_mon_id']) print pdb1, pdb2 d_coords1, l_coords_alpha1 = mmCIF2coords.main(pdb1, d_mmCIF1) d_coords1, l_coords_alpha2 = mmCIF2coords.main(pdb1, d_mmCIF2) print len(l_coords_alpha1), len(l_coords_alpha2) stop continue ## ## align structure 1 and 2 ## instance_geometry = geometry.geometry() rmsd = instance_geometry.superpose(l_coords_alpha1,l_coords_alpha2) tv1 = instance_geometry.fitcenter rm = instance_geometry.rotation tv2 = instance_geometry.refcenter ## structural alignment for i_coord in range(len(l_coords_alpha2)): l_coords_alpha2[i_coord] = numpy.dot(l_coords_alpha2[i_coord]-tv1,rm)+tv2 ## ## vector from structure 1 to 2 ## vector = [] for i in range(len(l_coords_alpha1)): vector += [ l_coords_alpha1[i][0]-l_coords_alpha2[i][0], l_coords_alpha1[i][1]-l_coords_alpha2[i][1], l_coords_alpha1[i][2]-l_coords_alpha2[i][2], ] vector = numpy.array(vector) ## ## calculate normal modes of structure 1 ## cutoff = 10 try: matrix_hessian1 = NMA.hessian_calculation(l_coords_alpha1, cutoff, verbose = False) eigenvectors1, eigenvalues1 = NMA.diagonalize_hessian(matrix_hessian1, verbose = False) matrix_hessian2 = NMA.hessian_calculation(l_coords_alpha2, cutoff, verbose = False) eigenvectors2, eigenvalues2 = NMA.diagonalize_hessian(matrix_hessian2, verbose = False) except: continue ## ## calculate overlap between normal modes and difference vector ## eigenvector1 = eigenvectors1[6] eigenvector2 = eigenvectors2[6] overlap1 = calc_overlap(eigenvector1,vector) overlap2 = calc_overlap(eigenvector2,vector) overlap3a = calc_overlap(eigenvector1,eigenvector2) overlap3b = calc_overlap(eigenvectors1[6],eigenvectors2[7]) overlap3c = calc_overlap(eigenvectors1[7],eigenvectors2[6]) overlap3 = max(overlap3a,overlap3b,overlap3c) fd = open('rmsd_v_overlap2/cluster%i.txt' %(i_line),'a') fd.write('%s %s\n' %(rmsd,overlap1)) fd.close() fd = open('rmsd_v_overlap2/cluster%i.txt' %(i_line),'a') fd.write('%s %s\n' %(rmsd,overlap2)) fd.close() fd = open('rmsd_v_overlap2/cluster%i_ev_v_ev.txt' %(i_line),'a') fd.write('%s %s\n' %(rmsd,overlap3a)) fd.close() fd = open('rmsd_v_overlap2/cluster%i_ev_v_ev_max.txt' %(i_line),'a') fd.write('%s %s\n' %(rmsd,overlap3)) fd.close() print pdb1, pdb2, 'cluster', i_line, 'size', len(l_pdbs), print 'overlap', '%4.2f' %(round(overlap1,2)), '%4.2f' %(round(overlap2,2)), '%4.2f' %(round(overlap3,2)), 'rmsd', '%4.2f' %(round(rmsd,2)) return
def get_position_ligand(pdb,pdb_apo,d_apo2holo,): pdb_holo = d_apo2holo[pdb_apo]['holo'] d_mmCIF_holo = parse_mmCIF.main(pdb_holo,) d_coords, l_coords_alpha_holo = mmCIF2coords.main(pdb_holo,d_mmCIF_holo) ## ## ## ligand = d_apo2holo[pdb_apo]['ligand'] l_residues = [] for i in range(len(d_mmCIF_holo['_struct_site.id'])): if not 'BINDING SITE FOR RESIDUE %s' %(ligand) in d_mmCIF_holo['_struct_site.details'][i]: continue if len(l_residues) > 0: print pdb, pdb_apo, pdb_holo print l_residues print d_mmCIF_holo['_struct_site.details'][i] stop struct_site_ID = d_mmCIF_holo['_struct_site.id'][i] for j in range(len(d_mmCIF_holo['_struct_site_gen.site_id'])): struct_site_gen_ID = d_mmCIF_holo['_struct_site_gen.site_id'][j] if struct_site_ID == struct_site_gen_ID: residue = int(d_mmCIF_holo['_struct_site_gen.auth_seq_id'][j]) ## l_residues += [residue] ## include neighboring residues l_residues += [residue-1,residue,residue+1] l_residues = list(set(l_residues)) if len(l_residues) == 0: print pdb stop ## l_coords_ligand = [] for i in range(len(d_mmCIF_holo['_atom_site.id'])): if ( d_mmCIF_holo['_atom_site.group_PDB'][i] == 'HETATM' and d_mmCIF_holo['_atom_site.label_comp_id'][i] == ligand ): x = float(d_mmCIF_holo['_atom_site.Cartn_x'][i]) y = float(d_mmCIF_holo['_atom_site.Cartn_y'][i]) z = float(d_mmCIF_holo['_atom_site.Cartn_z'][i]) coord = numpy.array([x,y,z,]) l_coords_ligand += [coord] d_mmCIF_apo = parse_mmCIF.main(pdb_apo,) d_coords, l_coords_alpha_apo = mmCIF2coords.main(pdb_apo,d_mmCIF_apo) ## structural alignment ## solution that works in all cases ## also for 2d59 and 2d5a, which have residues missing at the Nterm and Cterm, respectively ## first non-? index1_seq_apo = next((i for i,v in enumerate(d_mmCIF_apo['_pdbx_poly_seq_scheme.pdb_mon_id']) if v != '?')) index1_seq_holo = next((i for i,v in enumerate(d_mmCIF_holo['_pdbx_poly_seq_scheme.pdb_mon_id']) if v != '?')) ## last non-? index2_seq_apo = len(d_mmCIF_apo['_pdbx_poly_seq_scheme.pdb_mon_id'])-next((i for i,v in enumerate(reversed(d_mmCIF_apo['_pdbx_poly_seq_scheme.pdb_mon_id'])) if v != '?')) index2_seq_holo = len(d_mmCIF_holo['_pdbx_poly_seq_scheme.pdb_mon_id'])-next((i for i,v in enumerate(reversed(d_mmCIF_holo['_pdbx_poly_seq_scheme.pdb_mon_id'])) if v != '?')) ## first common non-? index1_coord_apo = max(0,index1_seq_holo-index1_seq_apo) index1_coord_holo = max(0,index1_seq_apo-index1_seq_holo) ## last common non-? index2_coord_apo = len(l_coords_alpha_apo)+min(0,index2_seq_holo-index2_seq_apo) index2_coord_holo = len(l_coords_alpha_holo)+min(0,index2_seq_apo-index2_seq_holo) l_coords_alpha_apo = l_coords_alpha_apo[index1_coord_apo:index2_coord_apo] l_coords_alpha_holo = l_coords_alpha_holo[index1_coord_holo:index2_coord_holo] if pdb == pdb_apo: l_seq_num = d_mmCIF_apo['_pdbx_poly_seq_scheme.pdb_seq_num'][index1_coord_apo:index2_coord_apo] chain = ''.join(d_mmCIF_apo['_entity_poly.pdbx_strand_id']) n_residues = len(l_coords_alpha_apo) l_coords_alpha = l_coords_alpha_apo else: l_seq_num = d_mmCIF_holo['_pdbx_poly_seq_scheme.pdb_seq_num'][index1_coord_holo:index2_coord_holo] chain = ''.join(d_mmCIF_holo['_entity_poly.pdbx_strand_id']) n_residues = len(l_coords_alpha_holo) l_coords_alpha = l_coords_alpha_holo overlap_site = 1. ## ## ## ## eigenvector ## ## ## cutoff = 10 ## matrix_hessian = NMA.hessian_calculation(l_coords_alpha,cutoff,) ## eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian) ## ## ## apply transformation matrix ## if pdb == pdb_apo: ## instance_geometry = geometry.geometry() ## rmsd = instance_geometry.superpose(l_coords_alpha_apo,l_coords_alpha_holo,) ## tv1 = instance_geometry.fitcenter ## rm = instance_geometry.rotation ## tv2 = instance_geometry.refcenter ## for i_coord in range(len(l_coords_ligand)): ## l_coords_ligand[i_coord] = numpy.dot(l_coords_ligand[i_coord]-tv1,rm)+tv2 ## ## ## ## ## apo/holo eigenvector ## ## ## vector_apo2holo = [] ## for i in range(len(l_coords_alpha_holo)): ## vector_apo2holo += [ ## l_coords_alpha_holo[i][0]-l_coords_alpha_apo[i][0], ## l_coords_alpha_holo[i][1]-l_coords_alpha_apo[i][1], ## l_coords_alpha_holo[i][2]-l_coords_alpha_apo[i][2], ## ] ## vector_apo2holo = numpy.array(vector_apo2holo) ## ## ## ## ## calculate overlap between normal modes and difference vector ## ## in the ligand binding site!!! ## ## ## vector_apo2holo_site = [] ## eigenvector_site = [] ## ## exclude coordinate not at the ligand binding site ## for i_seq_num in range(len(l_seq_num)): ## seq_num = int(l_seq_num[i_seq_num]) ## if seq_num in l_residues: ## eigenvector_site += list(eigenvectors[6][3*i_seq_num:3*i_seq_num+3]) ## vector_apo2holo_site += list(vector_apo2holo[3*i_seq_num:3*i_seq_num+3]) ## ## calculate overlap ## vector_apo2holo_site = numpy.array(vector_apo2holo_site) ## eigenvector_site = numpy.array(eigenvector_site) ## overlap_site = abs( ## numpy.dot(eigenvector_site,vector_apo2holo_site) ## / ## math.sqrt( ## numpy.dot(eigenvector_site,eigenvector_site) ## * ## numpy.dot(vector_apo2holo_site,vector_apo2holo_site) ## ) ## ) ## if overlap_site > 0.8: ## print vector_apo2holo_site ## print eigenvector_site ## print pdb ## print l_residues position_ligand = sum(l_coords_ligand)/len(l_coords_ligand) n_atoms = len(l_coords_ligand) return position_ligand, chain, n_residues, n_atoms, ligand, overlap_site
def parse_GoodVibes_exclude_flexible(pdb,path,): ## ## calculate amplitudes ## d_mmCIF = parse_mmCIF.main(pdb[:4],) d_coords, l_coords_alpha = mmCIF2coords.main(pdb[:4],d_mmCIF,query_chain=pdb[-1]) print len(l_coords_alpha) ## ## eigenvector ## cutoff = 10 matrix_hessian = NMA.hessian_calculation(l_coords_alpha,cutoff,) eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian) l_amplitudes = [ math.sqrt( eigenvectors[6][i]**2+eigenvectors[6][i+1]**2+eigenvectors[6][i+2]**2 ) for i in range(0,len(eigenvectors[6]),3) ] ## ## write pdb (color by bfactor) ## l_bfactors = [100*(l_amplitudes[i]-min(l_amplitudes))/(max(l_amplitudes)-min(l_amplitudes)) for i in range(len(l_amplitudes))] ## fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r') ## lines = fd.readlines() ## fd.close() ## index = [-1,None,] ## lines_out = [] ## for line in lines: ## record = line[:6].strip() ## if record != 'ATOM': ## lines_out += [line] ## else: ## res_no = int(line[22:26]) ## if res_no != index[1]: ## index = [index[0]+1,res_no,] ## bfactor = l_bfactors[index[0]] ## line_out = '%s%6.2f%s' %(line[:60],bfactor,line[66:],) ## lines_out += [line_out] ## fd = open('output/%s/%s_%s_probe_color_by_amplitude.pdb' %(path,pdb[:4],pdb[-1],),'w') ## fd.writelines(lines_out) ## fd.close() ## average amplitude average = sum(l_amplitudes)/len(l_amplitudes) average,stddev = statistics.do_stddev(l_amplitudes) ## l_coords_rigid = [] for i in range(len(l_coords_alpha)): if l_amplitudes[i] < average: l_coords_rigid += [l_coords_alpha[i]] l_coords_flexible = [] for i in range(len(l_coords_alpha)): if l_amplitudes[i] > average+0.5*stddev: l_coords_flexible += [l_coords_alpha[i]] ## parse output fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r') lines = fd.readlines() fd.close() max_bfactor = None coord = None for line in lines: record = line[:6].strip() if record not in ['ATOM','HETATM',]: continue res_name = line[17:20] if res_name != 'EXT': continue bfactor = float(line[60:66]) if bfactor > max_bfactor: x = float(line[30:38]) y = float(line[38:46]) z = float(line[46:54]) ## coord_tmp = numpy.array([x,y,z,]) ## bool_vicinal_to_rigid = False ## for coord_rigid in l_coords_rigid: ## dist_from_rigid = math.sqrt(sum((coord_rigid-coord_tmp)**2)) ## if dist_from_rigid < 6: ## bool_vicinal_to_rigid = True ## break ## if bool_vicinal_to_rigid == False: ## continue ## bool_vicinal_to_flexible = False ## for coord_flexible in l_coords_flexible: ## dist_from_flexible = math.sqrt(sum((coord_flexible-coord_tmp)**2)) ## if dist_from_flexible < 6: ## bool_vicinal_to_flexible = True ## break ## if bool_vicinal_to_flexible == True: ## continue ## min_dist = [1000.,None,] ## for i_coord_alpha in range(len(l_coords_alpha)): ## coord_alpha = l_coords_alpha[i_coord_alpha] ## dist_from_alpha = math.sqrt(sum((coord_alpha-coord_tmp)**2)) ## if dist_from_alpha < min_dist[0]: ## min_dist = [dist_from_alpha,i_coord_alpha,] ## if l_amplitudes[min_dist[1]] > average+stddev: ## continue coord = numpy.array([x,y,z,]) max_bfactor = bfactor return coord
'1u3fA', '1agyA', '1zioA', '1pa9A', '2tpsA', '2plcA', '1qk2A', '1j53A', '1m21A', ] cutoff = 10 for pdb in l_pdbs: pdb = pdb[:4] d = parse_mmCIF.main(pdb, ) d_coords, l_coords = mmCIF2coords.main(pdb, d, query_chain=pdb[4:]) matrix_hessian = NMA.hessian_calculation(l_coords, cutoff, verbose=False) eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian, verbose=False) visualization.vmd_arrows(pdb, l_coords, eigenvectors) print pdb stop