def test_contact_search(self): cs = gemmi.ContactSearch(4.0) hg = gemmi.Element('Hg') self.assertEqual(cs.get_radius(hg), 0) cs.setup_atomic_radii(1, 0) cs.set_radius(hg, 1.5) self.assertEqual(cs.get_radius(hg), 1.5)
def test_fen4(self): path = full_path('2242624.cif') small = gemmi.read_small_structure(path) types = small.atom_types self.assertEqual(len(types), 2) self.assertEqual(types[0].symbol, 'Fe') self.assertEqual(types[1].element, gemmi.Element('N'))
def test_pdb_element_names(self): pdb_line = "HETATM 4154 MG MG A 341 1.384 19.340 11.968" \ " 1.00 67.64 MG" for line in [pdb_line, pdb_line.strip(' MG'), pdb_line[:-2] + ' ']: st = gemmi.read_pdb_string(line) residue = st[0].sole_residue('A', gemmi.SeqId(341, ' ')) mg_atom = residue.sole_atom('MG') self.assertEqual(mg_atom.element.name, 'Mg') self.assertAlmostEqual(mg_atom.b_iso, 67.64, delta=1e-6) mg_atom.element = gemmi.Element('Cu') self.assertEqual(mg_atom.element.name, 'Cu')
def calculate_formula_weight(formula): total = 0. for elem_count in formula.split(): if elem_count.isalpha(): elem = elem_count count = 1 else: n = 2 if elem_count[1].isalpha() else 1 elem = elem_count[:n] count = int(elem_count[n:]) total += count * gemmi.Element(elem).weight return total
def test_pdb_element_names_from_amber(self): st = gemmi.read_pdb_string(AMBER_FRAGMENT) residue = st[0][''][0] self.assertEqual(residue.sole_atom('CB').element, gemmi.Element('C')) self.assertEqual(residue.sole_atom('HB').element, gemmi.Element('H')) self.assertEqual(residue.sole_atom('CG1').element, gemmi.Element('C')) self.assertEqual(residue.sole_atom('HG11').element, gemmi.Element('H')) chain = gemmi.read_pdb_string(FRAGMENT_WITH_HG)[0]['P'] self.assertEqual(chain[0].sole_atom('HG').element, gemmi.Element('Hg')) self.assertEqual(chain[1].sole_atom('HG1').element, gemmi.Element('Hg'))
def test_pdb_element_names_from_amber(self): st = gemmi.read_pdb_string(AMBER_FRAGMENT) residue = st[0][''][0] self.assertEqual(residue.sole_atom('CB').element, gemmi.Element('C')) self.assertEqual(residue.sole_atom('HB').element, gemmi.Element('H')) self.assertEqual(residue.sole_atom('CG1').element, gemmi.Element('C')) self.assertEqual(residue.sole_atom('HG11').element, gemmi.Element('H')) lines = AMBER_FRAGMENT.splitlines() for n, atom in enumerate(residue): self.assertEqual(atom.padded_name(), lines[n][12:16].rstrip()) chain = gemmi.read_pdb_string(FRAGMENT_WITH_HG)[0]['P'] self.assertEqual(chain[0].sole_atom('HG').element, gemmi.Element('Hg')) self.assertEqual(chain[1].sole_atom('HG1').element, gemmi.Element('Hg'))
def main(): # stage 1: reading PDB data pdb_data = [] for arg in sys.argv[1:]: for path in sorted_search(arg): try: item = get_file_stats(path) except RuntimeError as e: sys.stderr.write('Failed to read %s: %s\n' % (path, e)) continue pdb_data.append(item) if PLAIN_TEXT: print('%s %5.0f %3.1g %s' % item) # stage 2: gathering per-component statistics stats = defaultdict(lambda: { 'cat': None, 'files': 0, 'poly': 0, 'nonpoly': 0, 'pdb': (None, 0) }) for item in pdb_data: pdb_id, volume, resolution, rest = item if volume < 10 or volume != volume: volume = 1e12 score_mult = 1.0 / volume / resolution for item in rest.split(): comp, poly, nonpoly = item.split(':') d = stats[comp] d['files'] += 1 d['poly'] += int(poly) d['nonpoly'] += int(nonpoly) score = (int(poly) + int(nonpoly)) * score_mult if score > d['pdb'][1]: d['pdb'] = (pdb_id, score) # stage 2a: add category from components.cif; also, count metal atoms ccd_category = {} metal_count = {} ccd = gemmi.cif.read(CCD_PATH) for block in ccd: comp_id = block.find_value('_chem_comp.id') ccd_category[comp_id] = block.find_value('_chem_comp.type') symbols = block.find_values('_chem_comp_atom.type_symbol') metal_count[comp_id] = sum(gemmi.Element(s).is_metal for s in symbols) # stage 2b: add category from the Refmac monomer library monlist = gemmi.cif.read(MON_LIB_LIST)['comp_list'] refmac_category = { cc[0]: cc[1] for cc in monlist.find('_chem_comp.', ['id', 'group']) } # stage 3: output total_files = len(pdb_data) if not PLAIN_TEXT: print('{\n"file_count": %d,\n"data": [' % total_files, end='') sep = '' for key in sorted(stats.keys(), key=lambda k: -stats[k]['files']): cat = ccd_category.get(key, '?').strip('"\'').lower() cat = cat.replace('beta', '\u03B2') cat = cat.replace('gamma', '\u03B3') cat = cat.replace('delta', '\u03B4') if 'terminus' in cat: cat = cat.replace('NH3 amino terminus', 'N-terminus') cat = cat.replace('cooh carboxy terminus', 'C-terminus') cat = cat.replace('oh 3 prime terminus', "3'-terminus") cat = cat.replace('oh 5 prime terminus', "5'-terminus") rcat = refmac_category.get(key, 'n/a').lower().strip('"') d = stats[key] total = d['poly'] + d['nonpoly'] poly_percent = 100.0 * d['poly'] / total example = d['pdb'][0] if PLAIN_TEXT: print('%3s %2d %7d %5d %7.3f %s' % (key, metal_count[key], d['files'], total, poly_percent, example)) else: print('%s\n["%s",%d,"%s","%s",%d,%d,%.3f,"%s"]' % (sep, key, metal_count[key], cat, rcat, d['files'], total, poly_percent, example), end='') sep = ',' if not PLAIN_TEXT: print('\n]\n}')