def main(): d_MV = {} path = '/data/mmCIF' l_dn = os.listdir(path) l_dn.sort() for dn in l_dn: if dn == 'mmCIF.py': continue if dn < sys.argv[-2]: continue if dn > sys.argv[-1]: continue l_fn = os.listdir('%s/%s' % (path, dn)) for fn in l_fn: pdb = fn[:4] ## if pdb.upper() not in s_pdbs: ## continue d_mmCIF = parse_mmCIF.main( pdb, d_breaks={'_exptl.method': 'SOLUTION NMR'}, l_data_categories=[ '_cell', '_entity', '_exptl', '_exptl_crystal', '_entity_poly', '_symmetry', ## virus '_pdbx_struct_assembly', ## split structure '_pdbx_database_related', ], ) ## x-ray structure if d_mmCIF['_exptl.method'] != ['X-RAY DIFFRACTION']: continue ## polymer present if not '_entity_poly.type' in d_mmCIF.keys(): continue ## only polymer present is protein if d_mmCIF['_entity_poly.type'] != len( d_mmCIF['_entity_poly.type']) * ['polypeptide(L)']: continue if not '_pdbx_struct_assembly.oligomeric_count' in d_mmCIF.keys(): continue if d_mmCIF['_pdbx_struct_assembly.oligomeric_count'] == len( d_mmCIF['_pdbx_struct_assembly.oligomeric_count']) * ['?']: continue ## virus if int(d_mmCIF['_pdbx_struct_assembly.oligomeric_count'] [0]) % 60 == 0: continue ## not monomer if d_mmCIF['_pdbx_struct_assembly.oligomeric_count'] != len( d_mmCIF['_pdbx_struct_assembly.oligomeric_count']) * ['1']: continue ## split structure if '_pdbx_database_related' in d_mmCIF.keys(): if 'split' in d_mmCIF['_pdbx_database_related']: continue if 'SPLIT' in d_mmCIF['_pdbx_database_related']: print pdb stop if not '_cell.Z_PDB' in d_mmCIF.keys(): continue if pdb in [ ## treshold '1e54', '1e9i', ## difference between calculated MV and MV in mmCIF '3eiq', ## The crystals diffracted to 1.7Angstrom and appeared to be I centered tetragonal with ## unit cell dimension a=198.42Angstrom and c=396.6Angstrom, however the data only merged successfully in P1 ## unit cell a=196.61 b=196.48 c=240.63 alpha=65.91 beta=65.91 gamma=90.01. ## Toscana has published with Hellinga... '2cjf', '2bt4', ]: continue ## if not ''.join(d_mmCIF['_symmetry.space_group_name_H-M']) in [ ## 'P 1','P 43 21 2','P 21 3','P 42 3 2','C 1 2 1','F 2 3','P 64 2 2','H 3', ## ]: ## continue ## tmp!!! a = float(d_mmCIF['_cell.length_a'][0]) b = float(d_mmCIF['_cell.length_b'][0]) c = float(d_mmCIF['_cell.length_c'][0]) alpha = float(d_mmCIF['_cell.angle_alpha'][0]) beta = float(d_mmCIF['_cell.angle_beta'][0]) gamma = float(d_mmCIF['_cell.angle_gamma'][0]) Z = int(d_mmCIF['_cell.Z_PDB'][0]) mw = 0 for i in range(len(d_mmCIF['_entity.id'])): ## if d_mmCIF['_entity.type'][i] == 'polymer': s = d_mmCIF['_entity.formula_weight'][i] ## unknown ligand if s == '?': continue mw += float(s) MV = matthews_coefficient.main(a, b, c, alpha, beta, gamma, mw, Z) spacegroup = ''.join(d_mmCIF['_symmetry.space_group_name_H-M']) if spacegroup not in [ 'F 4 3 2', 'F 41 3 2', 'I 41 3 2', ]: continue ## tmp!!! if MV > 10: print pdb print 'mw', mw print 'MV', MV, d_mmCIF['_exptl_crystal.density_Matthews'] print 'Z', Z import math alpha *= math.pi / 180. beta *= math.pi / 180. gamma *= math.pi / 180. V = a * b * c * math.sqrt( 1 - math.cos(alpha)**2 - math.cos(beta)**2 - math.cos(gamma)**2 + 2 * (math.cos(alpha) * math.cos(beta) * math.cos(gamma))) print 'V', V continue stop_treshold stop if '_exptl_crystal.density_Matthews' in d_mmCIF.keys(): if d_mmCIF['_exptl_crystal.density_Matthews'] not in [ ['?'], len(d_mmCIF['_exptl_crystal.density_Matthews']) * ['?'], ]: if abs(MV - float(d_mmCIF['_exptl_crystal.density_Matthews'][0]) ) > 1: print 'MV', MV print 'MV', d_mmCIF['_exptl_crystal.density_Matthews'] print 'mw', mw print 'Z', Z continue stop_difference if not spacegroup in d_MV.keys(): d_MV[spacegroup] = [] d_MV[spacegroup] += [MV] print pdb, round(MV, 2), spacegroup ## fd = open('MV_v_spacegroup.txt','w') ## fd.write(str(d_MV)) ## fd.close() l = ['# MV_average MV_stddev n spacegroup\n'] for spacegroup in d_MV.keys(): l_MV = d_MV[spacegroup] if len(l_MV) <= 1: continue average, stddev = statistics.do_stddev(l_MV) average, stderr = statistics.do_stderr(l_MV) ## l += ['%s %s %s %s\n' %(average,stddev,len(l_MV),spacegroup,)] l += ['%s %s %s %s\n' % ( average, stderr, len(l_MV), spacegroup, )] fd = open('MV_v_spacegroup.txt', 'w') fd.writelines(l) fd.close() return
def main(): d_MV = {} path = '/data/mmCIF' l_dn = os.listdir(path) l_dn.sort() for dn in l_dn: if dn == 'mmCIF.py': continue if dn < sys.argv[-2]: continue if dn > sys.argv[-1]: continue l_fn = os.listdir('%s/%s' %(path,dn)) for fn in l_fn: pdb = fn[:4] ## if pdb.upper() not in s_pdbs: ## continue d_mmCIF = parse_mmCIF.main( pdb, d_breaks = {'_exptl.method':'SOLUTION NMR'}, l_data_categories = [ '_cell','_entity','_exptl','_exptl_crystal', '_entity_poly', '_symmetry', ## virus '_pdbx_struct_assembly', ## split structure '_pdbx_database_related', ], ) ## x-ray structure if d_mmCIF['_exptl.method'] != ['X-RAY DIFFRACTION']: continue ## polymer present if not '_entity_poly.type' in d_mmCIF.keys(): continue ## only polymer present is protein if d_mmCIF['_entity_poly.type'] != len(d_mmCIF['_entity_poly.type'])*['polypeptide(L)']: continue if not '_pdbx_struct_assembly.oligomeric_count' in d_mmCIF.keys(): continue if d_mmCIF['_pdbx_struct_assembly.oligomeric_count'] == len(d_mmCIF['_pdbx_struct_assembly.oligomeric_count'])*['?']: continue ## virus if int(d_mmCIF['_pdbx_struct_assembly.oligomeric_count'][0]) % 60 == 0: continue ## not monomer if d_mmCIF['_pdbx_struct_assembly.oligomeric_count'] != len(d_mmCIF['_pdbx_struct_assembly.oligomeric_count'])*['1']: continue ## split structure if '_pdbx_database_related' in d_mmCIF.keys(): if 'split' in d_mmCIF['_pdbx_database_related']: continue if 'SPLIT' in d_mmCIF['_pdbx_database_related']: print pdb stop if not '_cell.Z_PDB' in d_mmCIF.keys(): continue if pdb in [ ## treshold '1e54','1e9i', ## difference between calculated MV and MV in mmCIF '3eiq', ## The crystals diffracted to 1.7Angstrom and appeared to be I centered tetragonal with ## unit cell dimension a=198.42Angstrom and c=396.6Angstrom, however the data only merged successfully in P1 ## unit cell a=196.61 b=196.48 c=240.63 alpha=65.91 beta=65.91 gamma=90.01. ## Toscana has published with Hellinga... '2cjf','2bt4', ]: continue ## if not ''.join(d_mmCIF['_symmetry.space_group_name_H-M']) in [ ## 'P 1','P 43 21 2','P 21 3','P 42 3 2','C 1 2 1','F 2 3','P 64 2 2','H 3', ## ]: ## continue ## tmp!!! a = float(d_mmCIF['_cell.length_a'][0]) b = float(d_mmCIF['_cell.length_b'][0]) c = float(d_mmCIF['_cell.length_c'][0]) alpha = float(d_mmCIF['_cell.angle_alpha'][0]) beta = float(d_mmCIF['_cell.angle_beta'][0]) gamma = float(d_mmCIF['_cell.angle_gamma'][0]) Z = int(d_mmCIF['_cell.Z_PDB'][0]) mw = 0 for i in range(len(d_mmCIF['_entity.id'])): ## if d_mmCIF['_entity.type'][i] == 'polymer': s = d_mmCIF['_entity.formula_weight'][i] ## unknown ligand if s == '?': continue mw += float(s) MV = matthews_coefficient.main(a,b,c,alpha,beta,gamma,mw,Z) spacegroup = ''.join(d_mmCIF['_symmetry.space_group_name_H-M']) if spacegroup not in [ 'F 4 3 2', 'F 41 3 2', 'I 41 3 2', ]: continue ## tmp!!! if MV > 10: print pdb print 'mw', mw print 'MV', MV, d_mmCIF['_exptl_crystal.density_Matthews'] print 'Z', Z import math alpha *= math.pi/180. beta *= math.pi/180. gamma *= math.pi/180. V = a*b*c*math.sqrt(1-math.cos(alpha)**2-math.cos(beta)**2-math.cos(gamma)**2+2*(math.cos(alpha)*math.cos(beta)*math.cos(gamma))) print 'V', V continue stop_treshold stop if '_exptl_crystal.density_Matthews' in d_mmCIF.keys(): if d_mmCIF['_exptl_crystal.density_Matthews'] not in [['?'],len(d_mmCIF['_exptl_crystal.density_Matthews'])*['?'],]: if abs(MV-float(d_mmCIF['_exptl_crystal.density_Matthews'][0])) > 1: print 'MV', MV print 'MV', d_mmCIF['_exptl_crystal.density_Matthews'] print 'mw', mw print 'Z', Z continue stop_difference if not spacegroup in d_MV.keys(): d_MV[spacegroup] = [] d_MV[spacegroup] += [MV] print pdb, round(MV,2), spacegroup ## fd = open('MV_v_spacegroup.txt','w') ## fd.write(str(d_MV)) ## fd.close() l = ['# MV_average MV_stddev n spacegroup\n'] for spacegroup in d_MV.keys(): l_MV = d_MV[spacegroup] if len(l_MV) <= 1: continue average, stddev = statistics.do_stddev(l_MV) average, stderr = statistics.do_stderr(l_MV) ## l += ['%s %s %s %s\n' %(average,stddev,len(l_MV),spacegroup,)] l += ['%s %s %s %s\n' %(average,stderr,len(l_MV),spacegroup,)] fd = open('MV_v_spacegroup.txt','w') fd.writelines(l) fd.close() return
'3hle', '3hlf', '3hlg', ]: d_mmCIF = parse_mmCIF.main(pdb) a = float(d_mmCIF['_cell.length_a'][0]) b = float(d_mmCIF['_cell.length_b'][0]) c = float(d_mmCIF['_cell.length_c'][0]) alpha = float(d_mmCIF['_cell.angle_alpha'][0]) beta = float(d_mmCIF['_cell.angle_beta'][0]) gamma = float(d_mmCIF['_cell.angle_gamma'][0]) Z = int(d_mmCIF['_cell.Z_PDB'][0]) ## number of polymers in unit cell mw = 0 for i in range(len(d_mmCIF['_entity.id'])): if d_mmCIF['_entity.type'][i] == 'polymer': mw += float(d_mmCIF['_entity.formula_weight'][i]) MV = matthews_coefficient.main( a, b, c, alpha, beta, gamma, mw, Z, ) print pdb, MV
import sys sys.path.append('/home/people/tc/svn/tc_sandbox/pdb') import matthews_coefficient, parse_mmCIF for pdb in [ '2hhb','1hho','1hv4', '3hl9','3hlb','3hlc','3hld','3hle','3hlf','3hlg', ]: d_mmCIF = parse_mmCIF.main(pdb) a = float(d_mmCIF['_cell.length_a'][0]) b = float(d_mmCIF['_cell.length_b'][0]) c = float(d_mmCIF['_cell.length_c'][0]) alpha = float(d_mmCIF['_cell.angle_alpha'][0]) beta = float(d_mmCIF['_cell.angle_beta'][0]) gamma = float(d_mmCIF['_cell.angle_gamma'][0]) Z = int(d_mmCIF['_cell.Z_PDB'][0]) ## number of polymers in unit cell mw = 0 for i in range(len(d_mmCIF['_entity.id'])): if d_mmCIF['_entity.type'][i] == 'polymer': mw += float(d_mmCIF['_entity.formula_weight'][i]) MV = matthews_coefficient.main(a,b,c,alpha,beta,gamma,mw,Z,) print pdb, MV