def cif_stats(path): re_atom = re.compile("^(?:ATOM|HETATM)\s+(\d+)\s+.*") atom_site_ids = {} start_counting_atoms = False stats = {"atoms": 0} for ln in FileIO.OpenFile(path, "r").readlines(): if ln.startswith("_atom_site."): start_counting_atoms = True if not start_counting_atoms: continue ## count atoms m = re_atom.match(ln) if m != None: stats["atoms"] += 1 aid = m.group(1) if atom_site_ids.has_key(aid): print "cif_stats() ERROR: CIF DUPLICATE ID" print "cif_stats() [1]", atom_site_ids[aid] print "cif_stats() [2]", ln else: atom_site_ids[aid] = ln if stats["atoms"] > 0: return stats # Assume that we are looking at plain CIF file start_counting_atoms = False for ln in FileIO.OpenFile(path, "r").readlines(): if ln.startswith("_atom_site_label"): start_counting_atoms = True if not start_counting_atoms or ln[0] == "_": continue fields = ln.split() if not fields: break ## count atoms stats["atoms"] += 1 aid = fields[0] if atom_site_ids.has_key(aid): print "CIF DUPLICATE ID" print "[1]", atom_site_ids[aid] print "[2]", ln sys.exit(1) else: atom_site_ids[aid] = ln return stats
def pdb_stats(path): re_model = re.compile("^MODEL\s+(\d+).*") re_atom = re.compile("^(?:ATOM|HETATM)\s*(\d+).*") model = 1 serial_map = {} stats = {"atoms": 0} for ln in FileIO.OpenFile(path, "r").readlines(): ## change model m = re_model.match(ln) if m != None: model = m.group(1) continue ## count atoms m = re_atom.match(ln) if m != None: stats["atoms"] += 1 ser = m.group(1) ser = "%s-%s" % (ser, model) if serial_map.has_key(ser): print "pdb_stats() ERROR: PDB DUPLICATE ID" print "pdb_stats() [1]", serial_map[ser] print "pdb_stats() [2]", ln else: serial_map[ser] = ln return stats
def cif2html(cif_path, html_path): fileobj = FileIO.OpenFile(cif_path, "r") cif_file = mmCIF.mmCIFFile() print "loading..." cif_file.load_file(fileobj) print "converting..." cif_data = cif_file[0] c2h_template = kid.Template(file="cif2html.kid") c2h_template.cif = mmCIFDataKid(cif_data) c2h_template.write(html_path)