def parse_GoodVibes_exclude_flexible( pdb, path, ): ## ## calculate amplitudes ## d_mmCIF = parse_mmCIF.main(pdb[:4], ) d_coords, l_coords_alpha = mmCIF2coords.main(pdb[:4], d_mmCIF, query_chain=pdb[-1]) print len(l_coords_alpha) ## ## eigenvector ## cutoff = 10 matrix_hessian = NMA.hessian_calculation( l_coords_alpha, cutoff, ) eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian) l_amplitudes = [ math.sqrt(eigenvectors[6][i]**2 + eigenvectors[6][i + 1]**2 + eigenvectors[6][i + 2]**2) for i in range(0, len(eigenvectors[6]), 3) ] ## ## write pdb (color by bfactor) ## l_bfactors = [100*(l_amplitudes[i]-min(l_amplitudes))/(max(l_amplitudes)-min(l_amplitudes)) for i in range(len(l_amplitudes))] ## fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r') ## lines = fd.readlines() ## fd.close() ## index = [-1,None,] ## lines_out = [] ## for line in lines: ## record = line[:6].strip() ## if record != 'ATOM': ## lines_out += [line] ## else: ## res_no = int(line[22:26]) ## if res_no != index[1]: ## index = [index[0]+1,res_no,] ## bfactor = l_bfactors[index[0]] ## line_out = '%s%6.2f%s' %(line[:60],bfactor,line[66:],) ## lines_out += [line_out] ## fd = open('output/%s/%s_%s_probe_color_by_amplitude.pdb' %(path,pdb[:4],pdb[-1],),'w') ## fd.writelines(lines_out) ## fd.close() ## average amplitude average = sum(l_amplitudes) / len(l_amplitudes) average, stddev = statistics.do_stddev(l_amplitudes) ## l_coords_rigid = [] for i in range(len(l_coords_alpha)): if l_amplitudes[i] < average: l_coords_rigid += [l_coords_alpha[i]] l_coords_flexible = [] for i in range(len(l_coords_alpha)): if l_amplitudes[i] > average + 0.5 * stddev: l_coords_flexible += [l_coords_alpha[i]] ## parse output fd = open('output/%s/%s_%s_probe.pdb' % ( path, pdb[:4], pdb[-1], ), 'r') lines = fd.readlines() fd.close() max_bfactor = None coord = None for line in lines: record = line[:6].strip() if record not in [ 'ATOM', 'HETATM', ]: continue res_name = line[17:20] if res_name != 'EXT': continue bfactor = float(line[60:66]) if bfactor > max_bfactor: x = float(line[30:38]) y = float(line[38:46]) z = float(line[46:54]) ## coord_tmp = numpy.array([x,y,z,]) ## bool_vicinal_to_rigid = False ## for coord_rigid in l_coords_rigid: ## dist_from_rigid = math.sqrt(sum((coord_rigid-coord_tmp)**2)) ## if dist_from_rigid < 6: ## bool_vicinal_to_rigid = True ## break ## if bool_vicinal_to_rigid == False: ## continue ## bool_vicinal_to_flexible = False ## for coord_flexible in l_coords_flexible: ## dist_from_flexible = math.sqrt(sum((coord_flexible-coord_tmp)**2)) ## if dist_from_flexible < 6: ## bool_vicinal_to_flexible = True ## break ## if bool_vicinal_to_flexible == True: ## continue ## min_dist = [1000.,None,] ## for i_coord_alpha in range(len(l_coords_alpha)): ## coord_alpha = l_coords_alpha[i_coord_alpha] ## dist_from_alpha = math.sqrt(sum((coord_alpha-coord_tmp)**2)) ## if dist_from_alpha < min_dist[0]: ## min_dist = [dist_from_alpha,i_coord_alpha,] ## if l_amplitudes[min_dist[1]] > average+stddev: ## continue coord = numpy.array([ x, y, z, ]) max_bfactor = bfactor return coord
def main(): d_MV = {} path = '/data/mmCIF' l_dn = os.listdir(path) l_dn.sort() for dn in l_dn: if dn == 'mmCIF.py': continue if dn < sys.argv[-2]: continue if dn > sys.argv[-1]: continue l_fn = os.listdir('%s/%s' % (path, dn)) for fn in l_fn: pdb = fn[:4] ## if pdb.upper() not in s_pdbs: ## continue d_mmCIF = parse_mmCIF.main( pdb, d_breaks={'_exptl.method': 'SOLUTION NMR'}, l_data_categories=[ '_cell', '_entity', '_exptl', '_exptl_crystal', '_entity_poly', '_symmetry', ## virus '_pdbx_struct_assembly', ## split structure '_pdbx_database_related', ], ) ## x-ray structure if d_mmCIF['_exptl.method'] != ['X-RAY DIFFRACTION']: continue ## polymer present if not '_entity_poly.type' in d_mmCIF.keys(): continue ## only polymer present is protein if d_mmCIF['_entity_poly.type'] != len( d_mmCIF['_entity_poly.type']) * ['polypeptide(L)']: continue if not '_pdbx_struct_assembly.oligomeric_count' in d_mmCIF.keys(): continue if d_mmCIF['_pdbx_struct_assembly.oligomeric_count'] == len( d_mmCIF['_pdbx_struct_assembly.oligomeric_count']) * ['?']: continue ## virus if int(d_mmCIF['_pdbx_struct_assembly.oligomeric_count'] [0]) % 60 == 0: continue ## not monomer if d_mmCIF['_pdbx_struct_assembly.oligomeric_count'] != len( d_mmCIF['_pdbx_struct_assembly.oligomeric_count']) * ['1']: continue ## split structure if '_pdbx_database_related' in d_mmCIF.keys(): if 'split' in d_mmCIF['_pdbx_database_related']: continue if 'SPLIT' in d_mmCIF['_pdbx_database_related']: print pdb stop if not '_cell.Z_PDB' in d_mmCIF.keys(): continue if pdb in [ ## treshold '1e54', '1e9i', ## difference between calculated MV and MV in mmCIF '3eiq', ## The crystals diffracted to 1.7Angstrom and appeared to be I centered tetragonal with ## unit cell dimension a=198.42Angstrom and c=396.6Angstrom, however the data only merged successfully in P1 ## unit cell a=196.61 b=196.48 c=240.63 alpha=65.91 beta=65.91 gamma=90.01. ## Toscana has published with Hellinga... '2cjf', '2bt4', ]: continue ## if not ''.join(d_mmCIF['_symmetry.space_group_name_H-M']) in [ ## 'P 1','P 43 21 2','P 21 3','P 42 3 2','C 1 2 1','F 2 3','P 64 2 2','H 3', ## ]: ## continue ## tmp!!! a = float(d_mmCIF['_cell.length_a'][0]) b = float(d_mmCIF['_cell.length_b'][0]) c = float(d_mmCIF['_cell.length_c'][0]) alpha = float(d_mmCIF['_cell.angle_alpha'][0]) beta = float(d_mmCIF['_cell.angle_beta'][0]) gamma = float(d_mmCIF['_cell.angle_gamma'][0]) Z = int(d_mmCIF['_cell.Z_PDB'][0]) mw = 0 for i in range(len(d_mmCIF['_entity.id'])): ## if d_mmCIF['_entity.type'][i] == 'polymer': s = d_mmCIF['_entity.formula_weight'][i] ## unknown ligand if s == '?': continue mw += float(s) MV = matthews_coefficient.main(a, b, c, alpha, beta, gamma, mw, Z) spacegroup = ''.join(d_mmCIF['_symmetry.space_group_name_H-M']) if spacegroup not in [ 'F 4 3 2', 'F 41 3 2', 'I 41 3 2', ]: continue ## tmp!!! if MV > 10: print pdb print 'mw', mw print 'MV', MV, d_mmCIF['_exptl_crystal.density_Matthews'] print 'Z', Z import math alpha *= math.pi / 180. beta *= math.pi / 180. gamma *= math.pi / 180. V = a * b * c * math.sqrt( 1 - math.cos(alpha)**2 - math.cos(beta)**2 - math.cos(gamma)**2 + 2 * (math.cos(alpha) * math.cos(beta) * math.cos(gamma))) print 'V', V continue stop_treshold stop if '_exptl_crystal.density_Matthews' in d_mmCIF.keys(): if d_mmCIF['_exptl_crystal.density_Matthews'] not in [ ['?'], len(d_mmCIF['_exptl_crystal.density_Matthews']) * ['?'], ]: if abs(MV - float(d_mmCIF['_exptl_crystal.density_Matthews'][0]) ) > 1: print 'MV', MV print 'MV', d_mmCIF['_exptl_crystal.density_Matthews'] print 'mw', mw print 'Z', Z continue stop_difference if not spacegroup in d_MV.keys(): d_MV[spacegroup] = [] d_MV[spacegroup] += [MV] print pdb, round(MV, 2), spacegroup ## fd = open('MV_v_spacegroup.txt','w') ## fd.write(str(d_MV)) ## fd.close() l = ['# MV_average MV_stddev n spacegroup\n'] for spacegroup in d_MV.keys(): l_MV = d_MV[spacegroup] if len(l_MV) <= 1: continue average, stddev = statistics.do_stddev(l_MV) average, stderr = statistics.do_stderr(l_MV) ## l += ['%s %s %s %s\n' %(average,stddev,len(l_MV),spacegroup,)] l += ['%s %s %s %s\n' % ( average, stderr, len(l_MV), spacegroup, )] fd = open('MV_v_spacegroup.txt', 'w') fd.writelines(l) fd.close() return
def parse_GoodVibes_exclude_flexible(pdb,path,): ## ## calculate amplitudes ## d_mmCIF = parse_mmCIF.main(pdb[:4],) d_coords, l_coords_alpha = mmCIF2coords.main(pdb[:4],d_mmCIF,query_chain=pdb[-1]) print len(l_coords_alpha) ## ## eigenvector ## cutoff = 10 matrix_hessian = NMA.hessian_calculation(l_coords_alpha,cutoff,) eigenvectors, eigenvalues = NMA.diagonalize_hessian(matrix_hessian) l_amplitudes = [ math.sqrt( eigenvectors[6][i]**2+eigenvectors[6][i+1]**2+eigenvectors[6][i+2]**2 ) for i in range(0,len(eigenvectors[6]),3) ] ## ## write pdb (color by bfactor) ## l_bfactors = [100*(l_amplitudes[i]-min(l_amplitudes))/(max(l_amplitudes)-min(l_amplitudes)) for i in range(len(l_amplitudes))] ## fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r') ## lines = fd.readlines() ## fd.close() ## index = [-1,None,] ## lines_out = [] ## for line in lines: ## record = line[:6].strip() ## if record != 'ATOM': ## lines_out += [line] ## else: ## res_no = int(line[22:26]) ## if res_no != index[1]: ## index = [index[0]+1,res_no,] ## bfactor = l_bfactors[index[0]] ## line_out = '%s%6.2f%s' %(line[:60],bfactor,line[66:],) ## lines_out += [line_out] ## fd = open('output/%s/%s_%s_probe_color_by_amplitude.pdb' %(path,pdb[:4],pdb[-1],),'w') ## fd.writelines(lines_out) ## fd.close() ## average amplitude average = sum(l_amplitudes)/len(l_amplitudes) average,stddev = statistics.do_stddev(l_amplitudes) ## l_coords_rigid = [] for i in range(len(l_coords_alpha)): if l_amplitudes[i] < average: l_coords_rigid += [l_coords_alpha[i]] l_coords_flexible = [] for i in range(len(l_coords_alpha)): if l_amplitudes[i] > average+0.5*stddev: l_coords_flexible += [l_coords_alpha[i]] ## parse output fd = open('output/%s/%s_%s_probe.pdb' %(path,pdb[:4],pdb[-1],),'r') lines = fd.readlines() fd.close() max_bfactor = None coord = None for line in lines: record = line[:6].strip() if record not in ['ATOM','HETATM',]: continue res_name = line[17:20] if res_name != 'EXT': continue bfactor = float(line[60:66]) if bfactor > max_bfactor: x = float(line[30:38]) y = float(line[38:46]) z = float(line[46:54]) ## coord_tmp = numpy.array([x,y,z,]) ## bool_vicinal_to_rigid = False ## for coord_rigid in l_coords_rigid: ## dist_from_rigid = math.sqrt(sum((coord_rigid-coord_tmp)**2)) ## if dist_from_rigid < 6: ## bool_vicinal_to_rigid = True ## break ## if bool_vicinal_to_rigid == False: ## continue ## bool_vicinal_to_flexible = False ## for coord_flexible in l_coords_flexible: ## dist_from_flexible = math.sqrt(sum((coord_flexible-coord_tmp)**2)) ## if dist_from_flexible < 6: ## bool_vicinal_to_flexible = True ## break ## if bool_vicinal_to_flexible == True: ## continue ## min_dist = [1000.,None,] ## for i_coord_alpha in range(len(l_coords_alpha)): ## coord_alpha = l_coords_alpha[i_coord_alpha] ## dist_from_alpha = math.sqrt(sum((coord_alpha-coord_tmp)**2)) ## if dist_from_alpha < min_dist[0]: ## min_dist = [dist_from_alpha,i_coord_alpha,] ## if l_amplitudes[min_dist[1]] > average+stddev: ## continue coord = numpy.array([x,y,z,]) max_bfactor = bfactor return coord
def main(): d_MV = {} path = '/data/mmCIF' l_dn = os.listdir(path) l_dn.sort() for dn in l_dn: if dn == 'mmCIF.py': continue if dn < sys.argv[-2]: continue if dn > sys.argv[-1]: continue l_fn = os.listdir('%s/%s' %(path,dn)) for fn in l_fn: pdb = fn[:4] ## if pdb.upper() not in s_pdbs: ## continue d_mmCIF = parse_mmCIF.main( pdb, d_breaks = {'_exptl.method':'SOLUTION NMR'}, l_data_categories = [ '_cell','_entity','_exptl','_exptl_crystal', '_entity_poly', '_symmetry', ## virus '_pdbx_struct_assembly', ## split structure '_pdbx_database_related', ], ) ## x-ray structure if d_mmCIF['_exptl.method'] != ['X-RAY DIFFRACTION']: continue ## polymer present if not '_entity_poly.type' in d_mmCIF.keys(): continue ## only polymer present is protein if d_mmCIF['_entity_poly.type'] != len(d_mmCIF['_entity_poly.type'])*['polypeptide(L)']: continue if not '_pdbx_struct_assembly.oligomeric_count' in d_mmCIF.keys(): continue if d_mmCIF['_pdbx_struct_assembly.oligomeric_count'] == len(d_mmCIF['_pdbx_struct_assembly.oligomeric_count'])*['?']: continue ## virus if int(d_mmCIF['_pdbx_struct_assembly.oligomeric_count'][0]) % 60 == 0: continue ## not monomer if d_mmCIF['_pdbx_struct_assembly.oligomeric_count'] != len(d_mmCIF['_pdbx_struct_assembly.oligomeric_count'])*['1']: continue ## split structure if '_pdbx_database_related' in d_mmCIF.keys(): if 'split' in d_mmCIF['_pdbx_database_related']: continue if 'SPLIT' in d_mmCIF['_pdbx_database_related']: print pdb stop if not '_cell.Z_PDB' in d_mmCIF.keys(): continue if pdb in [ ## treshold '1e54','1e9i', ## difference between calculated MV and MV in mmCIF '3eiq', ## The crystals diffracted to 1.7Angstrom and appeared to be I centered tetragonal with ## unit cell dimension a=198.42Angstrom and c=396.6Angstrom, however the data only merged successfully in P1 ## unit cell a=196.61 b=196.48 c=240.63 alpha=65.91 beta=65.91 gamma=90.01. ## Toscana has published with Hellinga... '2cjf','2bt4', ]: continue ## if not ''.join(d_mmCIF['_symmetry.space_group_name_H-M']) in [ ## 'P 1','P 43 21 2','P 21 3','P 42 3 2','C 1 2 1','F 2 3','P 64 2 2','H 3', ## ]: ## continue ## tmp!!! a = float(d_mmCIF['_cell.length_a'][0]) b = float(d_mmCIF['_cell.length_b'][0]) c = float(d_mmCIF['_cell.length_c'][0]) alpha = float(d_mmCIF['_cell.angle_alpha'][0]) beta = float(d_mmCIF['_cell.angle_beta'][0]) gamma = float(d_mmCIF['_cell.angle_gamma'][0]) Z = int(d_mmCIF['_cell.Z_PDB'][0]) mw = 0 for i in range(len(d_mmCIF['_entity.id'])): ## if d_mmCIF['_entity.type'][i] == 'polymer': s = d_mmCIF['_entity.formula_weight'][i] ## unknown ligand if s == '?': continue mw += float(s) MV = matthews_coefficient.main(a,b,c,alpha,beta,gamma,mw,Z) spacegroup = ''.join(d_mmCIF['_symmetry.space_group_name_H-M']) if spacegroup not in [ 'F 4 3 2', 'F 41 3 2', 'I 41 3 2', ]: continue ## tmp!!! if MV > 10: print pdb print 'mw', mw print 'MV', MV, d_mmCIF['_exptl_crystal.density_Matthews'] print 'Z', Z import math alpha *= math.pi/180. beta *= math.pi/180. gamma *= math.pi/180. V = a*b*c*math.sqrt(1-math.cos(alpha)**2-math.cos(beta)**2-math.cos(gamma)**2+2*(math.cos(alpha)*math.cos(beta)*math.cos(gamma))) print 'V', V continue stop_treshold stop if '_exptl_crystal.density_Matthews' in d_mmCIF.keys(): if d_mmCIF['_exptl_crystal.density_Matthews'] not in [['?'],len(d_mmCIF['_exptl_crystal.density_Matthews'])*['?'],]: if abs(MV-float(d_mmCIF['_exptl_crystal.density_Matthews'][0])) > 1: print 'MV', MV print 'MV', d_mmCIF['_exptl_crystal.density_Matthews'] print 'mw', mw print 'Z', Z continue stop_difference if not spacegroup in d_MV.keys(): d_MV[spacegroup] = [] d_MV[spacegroup] += [MV] print pdb, round(MV,2), spacegroup ## fd = open('MV_v_spacegroup.txt','w') ## fd.write(str(d_MV)) ## fd.close() l = ['# MV_average MV_stddev n spacegroup\n'] for spacegroup in d_MV.keys(): l_MV = d_MV[spacegroup] if len(l_MV) <= 1: continue average, stddev = statistics.do_stddev(l_MV) average, stderr = statistics.do_stderr(l_MV) ## l += ['%s %s %s %s\n' %(average,stddev,len(l_MV),spacegroup,)] l += ['%s %s %s %s\n' %(average,stderr,len(l_MV),spacegroup,)] fd = open('MV_v_spacegroup.txt','w') fd.writelines(l) fd.close() return
if d['spacegroup'] == 'different': d_statistics['diffSG']['alpha'] += [rmsd_alpha] d_statistics['diffSG']['heavy'] += [rmsd_heavy] d_statistics['diffSG']['chi1'] += [chi1_diff_average] else: d_statistics['sameSG']['alpha'] += [rmsd_alpha] d_statistics['sameSG']['heavy'] += [rmsd_heavy] d_statistics['sameSG']['chi1'] += [chi1_diff_average] prefix = 'CA_v_%s_%s_%s' %(y_property,protein,suffix_exclusion,) fd = open('%s.gnuplotdata' %(prefix),'w') fd.writelines(lines) fd.close() average_alpha, stddev_alpha = statistics.do_stddev(l_rmsds_alpha) average_heavy, stddev_heavy = statistics.do_stddev(l_rmsds_heavy) print 'alpha rmsd', len(l_rmsds_alpha), 'average', average_alpha, 'stddev', stddev_alpha print 'heavy rmsd', len(l_rmsds_heavy), 'average', average_heavy, 'stddev', stddev_heavy ################################################################################ #### mutants ##for i in range(n_columns): ## l_columns += [[l_columns[i][0]+n_columns,'',l_columns[i][2],]] #### put mutants in the background behind wts ##l_columns = l_columns[n_columns:]+l_columns[:n_columns] l_colors = [] l_pointsizes = [] l_pointtypes = []