def test_mmcif_file(self): path = os.path.join(os.path.dirname(__file__), '5i55.cif') block = cif.read(path).sole_block() self.assertEqual(len(block.get_mmcif_category_names()), 54) entry_cat = block.get_mmcif_category('_entry') self.assertEqual(entry_cat, {'id': ['5I55']}) drw_cat = block.get_mmcif_category('_diffrn_radiation_wavelength.') self.assertEqual( drw_cat, { 'id': ['1', '2', '3'], 'wavelength': ['0.9792', '0.9794', '0.9796'], 'wt': ['1.0'] * 3 }) cc_cat = block.get_mmcif_category('_chem_comp.') self.assertEqual(cc_cat['mon_nstd_flag'][:2], [False, 'y']) self.assertEqual(cc_cat['pdbx_synonyms'][:2], [None, None]) # test __delitem__ del block.find(['_entry.id'])[0] entry_cat = block.get_mmcif_category('_entry') self.assertEqual(entry_cat, {'id': []}) def nums(): return list(block.find_values('_entity_poly_seq.num')) tab = block.find(['_entity_poly_seq.mon_id']) self.assertEqual(nums(), [str(i) for i in range(1, 23)]) del tab[1::2] self.assertEqual(nums(), [str(i) for i in range(1, 23, 2)]) del tab[3:] self.assertEqual(nums(), ['1', '3', '5']) del tab[:-1] self.assertEqual(nums(), ['5'])
def test_reading_gzipped_file(self): path = os.path.join(os.path.dirname(__file__), '1pfe.cif.gz') cif_doc = cif.read(path) block = cif_doc.sole_block() categories = block.get_mmcif_category_names() self.assertEqual(categories[0], '_entry.') self.assertEqual(len(categories), 72) exptl = block.find_mmcif_category('_exptl') self.assertEqual(exptl.get_prefix(), '_exptl.') self.assertEqual( list(exptl.tags), ['_exptl.entry_id', '_exptl.method', '_exptl.crystals_number']) self.assertEqual(len(exptl), 1) self.assertEqual(exptl.width(), 3) exptl = block.find_mmcif_category('_exptl') self.assertEqual(len(exptl), 1) self.assertEqual(exptl.width(), 3) self.assertEqual(exptl[0].str(1), 'X-RAY DIFFRACTION') struct_asym = block.find_mmcif_category('_struct_asym') self.assertEqual(len(struct_asym), 7) self.assertEqual(struct_asym.width(), 5) self.assertListEqual(list(struct_asym[3]), ['D', 'N', 'N', '4', '?']) nonexistent = block.find_mmcif_category('_nonexistent') self.assertRaises(RuntimeError, nonexistent.get_prefix) self.assertEqual(len(nonexistent), 0) self.assertEqual(nonexistent.width(), 0)
def find_elements(root, name): try: doc = cif.read(os.path.join(root, name)) block = doc.sole_block() elems = set(block.find_loop("_atom_site.type_symbol")) print(name + ' ' + ' '.join(elems)) except Exception as e: print("Oops. %s" % e)
def read_rst(path): result = [] for row in cif.read(path)['restraints'].find('_restr.', COLUMNS): if row[0] in ['MONO', 'LINK']: result.append((row[0].lower(), row[2].lower(), [])) else: data = Restraint(*[x.lower() if x != '.' else None for x in row]) result[-1][2].append(data) return [r for r in result if r[2]]
def operate_cif_file(): doc = cif.read(sys.argv[1]) block = doc[0] return block.find( ["_atom_site.group_PDB", "_atom_site.label_entity_id", "_atom_site.label_seq_id", "_atom_site.label_atom_id", "_atom_site.Cartn_x", "_atom_site.Cartn_y", "_atom_site.Cartn_z", "_atom_site.label_alt_id", "_atom_site.auth_asym_id", "_atom_site.auth_comp_id", "_atom_site.pdbx_PDB_model_num", "_atom_site.auth_seq_id", "_atom_site.pdbx_PDB_ins_code"])
def read_crd(path): block = cif.read(path).sole_block() sites = block.find('_atom_site.', ['id', 'label_atom_id', 'label_alt_id', 'label_comp_id', 'occupancy', 'calc_flag']) atoms = [a for a in sites if a[-1] != 'M'] real_serial = {None: None, '.': '.'} for a in atoms: real_serial[a[0]] = len(real_serial) - 1 return Crd(atoms, real_serial)
def aif2json(infile): """Convert AIF to JSON""" data = cif.read(infile).sole_block() data_dict = {} # wrapper for metadata for item in data: if item.pair is not None: #print('a', item.pair) inkey = item.pair[0] outkey, dtype = crossreference_keys(equivalency_table, inkey, informat='AIF') if dtype == float: data_dict[outkey] = float(item.pair[-1]) elif dtype == str: data_dict[outkey] = str(item.pair[-1]) elif dtype == int: data_dict[outkey] = int(item.pair[-1]) # wrapper for isotherm loop isotherm_data = [] ads_press = np.array(data.find_loop('_adsorp_pressure'), dtype=float) ads_amount = np.array(data.find_loop('_adsorp_amount'), dtype=float) try: ads_p0 = np.array(data.find_loop('_adsorp_p0'), dtype=float) output_p0 = True except ValueError: output_p0 = False # single component only for p, a in zip(ads_press, ads_amount): isotherm_data.append({ 'pressure': p, 'branch': 'adsorp', 'species_data': [{ 'name': data_dict['adsorbate'], 'composition': 1.0, 'adsorption': a }] }) if output_p0: pindex = np.where(ads_press == p) isotherm_data[-1]['p0'] = ads_p0[pindex][0] data_dict['isotherm_data'] = isotherm_data return json.dumps(data_dict, indent=4)
def gather_data(): "read mmCIF files and write down a few numbers (one file -> one line)" writer = csv.writer(sys.stdout, dialect='excel-tab') writer.writerow( ['code', 'na_chains', 'vs', 'vm', 'd_min', 'date', 'group']) for path in util.get_file_paths_from_args(): block = cif.read(path).sole_block() code = cif.as_string(block.find_value('_entry.id')) na = sum('nucleotide' in t[0] for t in block.find('_entity_poly.type')) vs = block.find_value('_exptl_crystal.density_percent_sol') vm = block.find_value('_exptl_crystal.density_Matthews') d_min = block.find_value('_refine.ls_d_res_high') dep_date_tag = '_pdbx_database_status.recvd_initial_deposition_date' dep_date = parse_date(block.find_values(dep_date_tag).str(0)) group = block.find_value('_pdbx_deposit_group.group_id') writer.writerow([code, na, vs, vm, d_min, dep_date, group])
def test_mmcif_file(self): path = os.path.join(os.path.dirname(__file__), '5i55.cif') block = cif.read(path).sole_block() self.assertEqual(len(block.get_mmcif_category_names()), 54) entry_cat = block.get_mmcif_category('_entry') self.assertEqual(entry_cat, {'id': ['5I55']}) drw_cat = block.get_mmcif_category('_diffrn_radiation_wavelength.') self.assertEqual( drw_cat, { 'id': ['1', '2', '3'], 'wavelength': ['0.9792', '0.9794', '0.9796'], 'wt': ['1.0'] * 3 }) cc_cat = block.get_mmcif_category('_chem_comp.') self.assertEqual(cc_cat['mon_nstd_flag'][:2], [False, 'y']) self.assertEqual(cc_cat['pdbx_synonyms'][:2], [None, None])
def compare_monlib_with_ccd(mon_path, ccd): 'compare monomers from monomer library and CCD that have the same names' PRINT_MISSING_ENTRIES = False cnt = 0 for path in get_monomer_cifs(mon_path): mon = cif.read(path) for mb in mon: if mb.name in ('', 'comp_list'): continue assert mb.name.startswith('comp_') name = mb.name[5:] cb = ccd.find_block(name) if cb: compare_chem_comp(mb, cb) cnt += 1 elif PRINT_MISSING_ENTRIES: print('Not in CCD:', name) print('Compared', cnt, 'monomers.')
def main(): parser = argparse.ArgumentParser() parser.add_argument('ccd_path', metavar='/path/to/components.cif[.gz]') parser.add_argument('-m', metavar='DIR', help='monomer library path (default: $CLIBD_MON)') parser.add_argument('-f', action='store_true', help='check CCD formulas') parser.add_argument('-v', action='store_true', help='verbose') args = parser.parse_args() global verbose verbose = args.v mon_path = args.m or os.getenv('CLIBD_MON') if not mon_path and not args.f: sys.exit('Unknown monomer library path: use -m or set $CLIBD_MON.') ccd = cif.read(args.ccd_path) if args.f: check_formulas(ccd) if mon_path: compare_monlib_with_ccd(mon_path, ccd)
def _get_mmcif_residue_list(self): """Get the list of residues from mmcif- in auth numbering to do residue check for json data :returs a dictionary, where #key=chain, val= {k:auth_residue_number(withinscode),v:resname} """ from gemmi import cif doc = cif.read(self.cif_file) block = doc.sole_block() polyseq = block.get_mmcif_category('_atom_site.') self.mmcif_data = { } #key=chain, val= {k:auth_residue_number(withinscode),v:resname} for auth_resnum, auth_resname, inscode, chain in zip( polyseq["auth_seq_id"], polyseq["auth_comp_id"], polyseq["pdbx_PDB_ins_code"], polyseq["auth_asym_id"]): if inscode == False or inscode == None: inscode = "" auth_residue_number = "%s%s" % (auth_resnum, inscode) self.mmcif_data.setdefault(chain, {})[auth_residue_number] = auth_resname
from __future__ import print_function import sys from gemmi import cif, CifWalk #ESD = 'Cartn_x_esd Cartn_y_esd Cartn_z_esd occupancy_esd B_iso_or_equiv_esd ' ESD = '' USUAL_ORDER = ('group_PDB id type_symbol label_atom_id label_alt_id ' 'label_comp_id label_asym_id label_entity_id label_seq_id ' 'pdbx_PDB_ins_code Cartn_x Cartn_y Cartn_z occupancy ' 'B_iso_or_equiv ' + ESD + 'pdbx_formal_charge ' 'auth_seq_id auth_comp_id auth_asym_id auth_atom_id ' 'pdbx_PDB_model_num') counts = {} for arg in sys.argv[1:]: for path in CifWalk(arg): block = cif.read(path).sole_block() loop_tags = block.find_loop("_atom_site.id").get_loop().tags assert all(t.startswith("_atom_site.") for t in loop_tags) tags = ' '.join(t[11:] for t in loop_tags) if tags != USUAL_ORDER: print(tags) print(USUAL_ORDER) print(block.name, tags) counts[tags] = counts.get(tags, 0) + 1 for key, value in counts.items(): print(value, key) # Results: in v4 a few EM structures (5A9Z 5AA0 5FKI 4UDF) # had different order, with ATOM/HETATM in the middle.
def test_frame_reading(self): block = cif.read(full_path('mmcif_pdbx_v50_frag.dic')).sole_block() self.assertIsNone(block.find_frame('heyho')) frame = block.find_frame('_atom_site.auth_atom_id') code = frame.find_value('_item_type.code') self.assertEqual(code, 'atcode')
# - CENTROID CO-ORDINATES FOR EACH MOLECULE IN A UNIT CELL # - SOME LIST OF LINKING NUMBERS #--------------------------------------------------------------------- files = list(gemmi.CifWalk(inputdir)) #Creates list of file names file_list = [] link_list = [] print(st + e) for file_path in files[st:e]: file_path_split = file_path.split("/") file_name = file_path_split[len(file_path_split) - 1] file_list.append(file_name) doc = cif.read(file_path) block = doc[0] for b in doc: if (b.find_loop('_atom_site_') != None): block = b print("Processed file --> " + file_name) crystal_reader = CrystalReader(file_path) crystal = crystal_reader[0] crystal.assign_bonds() packed_molecules = crystal.packing(box_dimensions=((0, 0, 0), (1, 1, 1)), inclusion='CentroidIncluded') packed_molecules.normalise_labels() adta_molecules = []
# -*- coding: utf-8 -*- """Plot AIF from command line""" import sys import os from gemmi import cif # pylint: disable-msg=no-name-in-module import matplotlib.pyplot as plt import numpy as np filename = sys.argv[1] aif = cif.read(filename) block = aif.sole_block() ads_press = np.array(block.find_loop('_adsorp_pressure'), dtype=float) ads_amount = np.array(block.find_loop('_adsorp_amount'), dtype=float) des_press = np.array(block.find_loop('_desorp_pressure'), dtype=float) des_amount = np.array(block.find_loop('_desorp_amount'), dtype=float) material_id = block.find_pair('_sample_material_id')[-1] plt.plot(ads_press, ads_amount, 'o-', color='C0') plt.plot(des_press, des_amount, 'o-', color='C0', markerfacecolor='white') plt.ylabel('quantity adsorbed / ' + block.find_pair('_units_loading')[-1]) plt.xlabel('pressure / ' + block.find_pair('_units_pressure')[-1]) plt.title( block.find_pair('_exptl_adsorptive')[-1] + ' on ' + material_id + ' at ' + block.find_pair('_exptl_temperature')[-1] + 'K') plt.savefig(os.path.splitext(filename)[0] + '.pdf')
def main(): for path in get_file_paths_from_args(): block = cif.read(path).sole_block() check_chem_comp_formula_weight(block) check_entity_formula_weight(block)
#!/usr/bin/env python3 # Read CCD and fill-in one-letter-codes in gemmi/resinfo.hpp # Usage: ./tools/resinfo.py > resinfo.hpp-new import re from sys import stderr import gemmi from gemmi import cif ccd = cif.read('components.cif.gz') STANDARD = [ 'ALA', 'ARG', 'ASN', 'ASP', 'ASX', 'CYS', 'GLN', 'GLU', 'GLX', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'UNK', 'VAL', 'SEC', 'PYL', 'A', 'C', 'G', 'I', 'U', 'DA', 'DC', 'DG', 'DI', 'DT', 'DU' ] def calculate_formula_weight(formula): total = 0. for elem_count in formula.split(): if elem_count.isalpha(): elem = elem_count count = 1 else: n = 2 if elem_count[1].isalpha() else 1 elem = elem_count[:n] count = int(elem_count[n:]) total += count * gemmi.Element(elem).weight
def test_file_not_found(self): with self.assertRaises(IOError): cif.read('file-that-does-not-exist.cif')