def merge_model(event, fs, build: Builds): # event_autobuilding_dir = event.pandda_event_dir / "autobuild_event_{}".format(event.event_idx) # event_ligandfit_dir = event_autobuilding_dir / "LigandFit_run_1_" # event_build_path = event_ligandfit_dir / "ligand_fit_1.pdb" event_autobuilding_dir = event.pandda_event_dir event_rhofit_dir = event_autobuilding_dir / "rhofit_{}".format( event.event_idx) # event_build_path = event_rhofit_dir / "best.pdb" event_build_path: Path = max( build.build_results.keys(), key=lambda x: build.build_results[x], ) initial_model_path = event.pandda_event_dir / "{}-pandda-input.pdb".format( event.dtag) # initial_model = PandasPdb().read_pdb(str(initial_model_path)) # best_autobuild_model = PandasPdb().read_psb(str(event_build_path)) initial_model = gemmi.read_structure(str(initial_model_path)) best_autobuild_model = gemmi.read_structure(str(event_build_path)) # initial_model.df["HETATM"] = initial_model.df["HETATM"].append(best_autobuild_model.df["HETATM"]) print("\tBefore adding lig there are {} chains".format( len(initial_model[0]))) initial_model[0].add_chain(best_autobuild_model[0][0]) print("\tAfter adding lig there are {} chains".format(len( initial_model[0]))) return initial_model
def test_read_1pfe_cif(self): st = gemmi.read_structure(full_path('1pfe.cif.gz')) self.check_1pfe(st) # write structure to cif and read it back out_name = get_path_for_tempfile(suffix='.cif') st.make_mmcif_document().write_file(out_name) st2 = gemmi.read_structure(out_name) os.remove(out_name) self.check_1pfe(st2)
def mergeLigands(mmFile, ligFiles, chainId, outFile): st = gemmi.read_structure(mmFile) nligs = 0 for lf in ligFiles: lig = gemmi.read_structure(lf) for lig_chain in lig[0]: residues = list(lig_chain) st[0].find_or_add_chain(chainId).append_residues(residues) nligs += len(residues) st.write_pdb(outFile) return nligs
def test_first_conformer(self): model = gemmi.read_structure(full_path('1pfe.cif.gz'))[0] b = model['B'] self.assertEqual([res.name for res in b if not res.is_water()], [ 'DSN', 'ALA', 'N2C', 'NCY', 'MVA', 'DSN', 'ALA', 'NCY', 'N2C', 'MVA', 'QUI', 'QUI' ]) self.assertEqual( [res.name for res in b.first_conformer() if not res.is_water()], [ 'DSN', 'ALA', 'N2C', 'MVA', 'DSN', 'ALA', 'NCY', 'MVA', 'QUI', 'QUI' ]) polymer = b.get_polymer() self.assertEqual([res.name for res in polymer], [ 'DSN', 'ALA', 'N2C', 'NCY', 'MVA', 'DSN', 'ALA', 'NCY', 'N2C', 'MVA' ]) self.assertEqual( [res.name for res in polymer.first_conformer()], ['DSN', 'ALA', 'N2C', 'MVA', 'DSN', 'ALA', 'NCY', 'MVA']) self.assertEqual(len(polymer), 10) self.assertEqual(polymer.length(), 8) self.assertEqual(polymer.make_one_letter_sequence(), 'sAXvsAXv') self.assertEqual([res.name for res in b.get_ligands()], ['QUI', 'QUI']) res1 = model.sole_residue('A', gemmi.SeqId('1')) self.assertEqual([atom.name for atom in res1.first_conformer()], [atom.name for atom in res1 if atom.altloc != 'B'])
def test_first_conformer(self): model = gemmi.read_structure(full_path('1pfe.cif.gz'))[0] b = model['B'] self.assertEqual([res.name for res in b if not res.is_water()], [ 'DSN', 'ALA', 'N2C', 'NCY', 'MVA', 'DSN', 'ALA', 'NCY', 'N2C', 'MVA', 'QUI', 'QUI' ]) self.assertEqual( [res.name for res in b.first_conformer() if not res.is_water()], [ 'DSN', 'ALA', 'N2C', 'MVA', 'DSN', 'ALA', 'NCY', 'MVA', 'QUI', 'QUI' ]) polymer = b.get_polymer() self.assertEqual([res.name for res in polymer], [ 'DSN', 'ALA', 'N2C', 'NCY', 'MVA', 'DSN', 'ALA', 'NCY', 'N2C', 'MVA' ]) self.assertEqual( [res.name for res in polymer.first_conformer()], ['DSN', 'ALA', 'N2C', 'MVA', 'DSN', 'ALA', 'NCY', 'MVA']) self.assertEqual(len(polymer), 10) self.assertEqual(polymer.length(), 8) # The bond between 4 MVA(v) and 5 DSN(s) is betwenn C and OG # (so it's not a peptide bond as expected). Depending on the heuristic # used to determine gaps, this sequence could have a gap in the middle. self.assertEqual(polymer.make_one_letter_sequence(), 'sAXvsAXv') self.assertEqual([res.name for res in b.get_ligands()], ['QUI', 'QUI']) res1 = model.sole_residue('A', gemmi.SeqId('1')) self.assertEqual([atom.name for atom in res1.first_conformer()], [atom.name for atom in res1 if atom.altloc != 'B'])
def test_5i55_predefined_removals(self, clear_entities=False): st = gemmi.read_structure(full_path('5i55.cif')) if clear_entities: self.assertEqual(len(st.entities), 4) st.entities = gemmi.VectorEntity() self.assertEqual(len(st.entities), 0) lys12 = st[0]['A']['12']['LYS'] count_b = sum(a.altloc == 'B' for a in lys12) model = st[0] # one author-chain and 4 label-chains: AA, 2 x ligand, waters self.assertEqual(len(model), 1) self.assertEqual(len(model.subchains()), 4) n_waters = len(model.get_subchain('D')) n_sites = model.count_atom_sites() occ_sum = model.count_occupancies() self.assertEqual(n_sites, occ_sum + count_b) st.remove_waters() self.assertEqual(len(model.subchains()), 3) self.assertEqual(model.count_atom_sites(), n_sites - n_waters) self.assertEqual(model.count_occupancies(), occ_sum - n_waters) st.remove_empty_chains() self.assertEqual(len(model.subchains()), 3) n_res = len(model['A']) st.remove_ligands_and_waters() self.assertEqual(len(model['A']), n_res - 2) model['A'].trim_to_alanine() # ALA has 5 atoms, except the last one which has OXT (hence +1) expected_count = sum(4 + (r.name != 'GLY') for r in model['A']) + 1 self.assertEqual(model.count_occupancies(), expected_count)
def test_assembly(self): st = gemmi.read_structure(full_path('1pfe.cif.gz'), merge_chain_parts=False) model = st[0] self.assertEqual([ch.name for ch in model], ['A', 'B', 'A', 'B', 'A', 'B']) a_mass = sum(ch.calculate_mass() for ch in model if ch.name == 'A') b_mass = sum(ch.calculate_mass() for ch in model if ch.name == 'B') model_mass = model.calculate_mass() self.assertAlmostEqual(model_mass, a_mass + b_mass) self.assertEqual(len(st.assemblies), 1) asem = st.assemblies[0] bio = gemmi.make_assembly(asem, model, gemmi.HowToNameCopiedChain.Short) self.assertEqual([ch.name for ch in bio], ['A', 'B', 'C', 'D']) self.assertAlmostEqual(bio.calculate_mass(), 2 * model_mass) self.assertAlmostEqual(bio[0].calculate_mass(), a_mass) self.assertAlmostEqual(bio[1].calculate_mass(), b_mass) self.assertAlmostEqual(bio[2].calculate_mass(), a_mass) self.assertAlmostEqual(bio[3].calculate_mass(), b_mass) bio = gemmi.make_assembly(asem, model, gemmi.HowToNameCopiedChain.AddNumber) self.assertEqual([ch.name for ch in bio], ['A1', 'B1', 'A2', 'B2'])
def checkSpaceGroup(self, hkl_spg, fpath_xyz): #mm = mmdb2.Manager() #mm.ReadCoorFile ( str(fpath_xyz) ) #spg = mm.GetSpaceGroup() spg = gemmi.read_structure(str(fpath_xyz)).sg_hm spg_key = spg.replace(" ", "") spg_info = {"spg": spg, "hkl": ""} #if spg_key != self.hkl.HM.replace(" ",""): if spg_key != hkl_spg.replace(" ", ""): self.file_stdout.write(" *** space group changed to " + spg + "\n") if not spg_key in self.mtz_alt: # reindex first time self.open_script("reindex_" + spg_key) self.write_script("SYMM \"" + spg + "\"\n") self.close_script() # new hkl file path hklout = os.path.join(self.datared_dir(), spg_key + ".mtz") cmd = ["hklin", self.mtzpath, "hklout", hklout] # run reindex self.runApp("reindex", cmd) if os.path.isfile(hklout): spg_info["hkl"] = hklout self.mtz_alt[spg_key] = hklout else: self.file_stdout.write(" +++ cannot reindex\n") self.file_stderr.write(" +++ cannot reindex\n") else: spg_info["hkl"] = self.mtz_alt[spg_key] return spg_info
def _load_pivot(self): """Create internal representation for pivot molecule Raises: RuntimeError: If Pivot structure cannot be loaded """ try: self.log.info("Downloading pivot") pivot_path = os.path.join( self.structures, f"{self.pdb_id}_{self.chain_id}.cif" ) self.chain_id = data_provider.get_auth_chain_id(self.pdb_id, self.chain_id) data_provider.download_structure(self.pdb_id, self.chain_id, pivot_path) self.log.info("Loading pivot") # take chain from first model structure = gemmi.read_structure(pivot_path) protein = structure[0][self.chain_id] self.pivot_structure = protein.get_polymer() except Exception as e: self.log.error(f"Something went wrong with pivot loading. Reason: {str(e)}") raise RuntimeError()
def run(path): counter = 0 st = gemmi.read_structure(path) if st.cell.is_crystal(): st.add_entity_types() for chain in st[0]: polymer = chain.get_polymer() if polymer: low_bounds = [float('+inf')] * 3 high_bounds = [float('-inf')] * 3 for residue in polymer: for atom in residue: pos = st.cell.fractionalize(atom.pos) for i in range(3): if pos[i] < low_bounds[i]: low_bounds[i] = pos[i] if pos[i] > high_bounds[i]: high_bounds[i] = pos[i] for i in range(3): delta = high_bounds[i] - low_bounds[i] if delta > 1.2: # 120% of the unit cell size counter += 1 code = st.info['_entry.id'] print('%s chain:%s delta%c = %.3f' % (code, chain.name, ord('X') + i, delta)) return counter
def read_1pfe(self, filename): st = gemmi.read_structure(full_path(filename)) self.assertAlmostEqual(st.cell.a, 39.374) self.assertEqual(st.cell.gamma, 120) self.assertEqual(st.name, '1PFE') self.assertEqual(st.spacegroup_hm, 'P 63 2 2') self.assertEqual(st.info['_entry.id'], '1PFE') self.assertEqual(st.info['_exptl.method'], 'X-RAY DIFFRACTION') self.assertEqual(len(st), 1) self.assertEqual(len(st[0]), 2) label_to_auth_name = { sub.name(): ch.name for ch in st[0] for sub in ch.subchains() } self.assertEqual(label_to_auth_name, dict(A='A', B='B', C='A', D='B', E='B', F='A', G='B')) self.assertEqual(len(st[0]['A']['1']), 1) chain_a = st[0]['A'] self.assertEqual(chain_a[0].label_seq, 1) self.assertEqual(chain_a['1'][0].seqid.num, 1) b3 = st[0]['B']['3'] self.assertEqual(len(b3), 2) self.assertEqual(repr(b3[0]), repr(st[0]['B'][2])) self.assertEqual(b3[0].name, 'N2C') self.assertEqual(b3[-1].name, 'NCY') chain_c = st[0].get_subchain('C') self.assertEqual(len(chain_c), 1) res_cl = list(chain_c)[0] self.assertEqual(res_cl.name, 'CL') self.assertEqual(len(res_cl), 1) atom_cl = res_cl['CL'] self.assertAlmostEqual(atom_cl.occ, 0.17) self.assertEqual(atom_cl.element.name, 'Cl')
def superpose_structure(self, file_path, chain_id): """Superpose PDB structure to pivot. Args: file_path (str): Path to the PDB entry. chain_id (str): Chain identifier to pick from PDB entry """ structure = gemmi.read_structure(file_path) model = structure[0] target = model[chain_id].get_polymer() p_type = gemmi.PolymerType.PeptideL superposition = gemmi.calculate_superposition( self.pivot_structure, target, p_type, gemmi.SupSelect.CaP ) if superposition.rmsd <= self.threshold: self.write_out_superposed(structure, chain_id, superposition.transform) self.log.info( f"Similarity with {structure.name}({chain_id}) is {superposition.rmsd:.3f}" ) else: self.log.info( f"Rejected similarity with {structure.name}({chain_id}) {superposition.rmsd:.3f} >= threshold {self.threshold}." ) self.rmsds.append(superposition.rmsd)
def test_read_write_5cvz_final(self, via_cif=False): path = full_path('5cvz_final.pdb') with open(path) as f: expected = [line.rstrip() for line in f if is_written_to_pdb(line, via_cif) and # SCALE is not written b/c CRYST1 has more precision. line[:5] != 'SCALE'] st = gemmi.read_structure(path) if via_cif: # input file w/o TER record -> subchains not setup automatically st.setup_entities() doc = st.make_mmcif_document() st = gemmi.make_structure_from_block(doc[0]) # First MTRIX which is identity is not stored. Let's add it here. identity_ncs = gemmi.NcsOp() identity_ncs.id = '1' identity_ncs.given = True st.ncs.insert(0, identity_ncs) out_lines = self.write_and_read(st, via_cif=False) if via_cif: out_lines = [line for line in out_lines # input file has no REMARK 2, but it gets generated # from REMARK 3 when going pdb->cif->pdb if line[:10] != 'REMARK 2' and line[:5] != 'TER '] self.assertEqual(expected, [line.rstrip() for line in out_lines])
def path_to_structure(path: Path) -> gemmi.Structure: structure = gemmi.read_structure(str(path)) structure.setup_entities() os.remove(str(path)) return structure
def __init__( self, initial_receptor_path, coords, receptor_path, ): # Load protein structure = gemmi.read_structure(str(initial_receptor_path)) receptor = structure[0] # Strip nearby residues remove_ids = [] for chain in receptor: ligands = chain.get_ligands() for ligand in ligands: distance = self.get_ligand_distance(ligand, coords) if distance < 10: print( "\t\tWill strip res {}. Mean distance {} from event!". format(ligand, distance)) remove_ids.append(str(ligand.seqid)) for chain in receptor: deletions = 0 for i, residue in enumerate(chain): if str(residue.seqid) in remove_ids: print("\t\tStripping {}".format(residue)) del chain[i - deletions] deletions = deletions + 1 # Save structure.write_pdb(str(receptor_path))
def test_read_write_4oz7(self, via_cif=False): path = full_path('4oz7.pdb') with open(path) as f: expected = [line for line in f if is_written_to_pdb(line, via_cif)] st = gemmi.read_structure(path, merge_chain_parts=False) out_lines = self.write_and_read(st, via_cif) self.assertEqual(expected, out_lines)
def test_read_1orc(self): st = gemmi.read_structure(full_path('1orc.pdb')) self.assertEqual(st.resolution, 1.54) self.assertAlmostEqual(st.cell.a, 34.77) self.assertEqual(st.cell.alpha, 90) self.assertEqual(len(st.ncs), 0) model = st[0] self.assertEqual(len(model), 1) self.assertEqual(len(model.subchains()), 2) A = model['A'] waters = A.get_waters() self.assertEqual(len(waters), 57) # FORMUL 2 HOH *57(H2 O) self.assertTrue(all(res.name == 'HOH' for res in waters)) self.assertTrue(A['3']) self.assertFalse(A['0']) self.assertEqual( [res.seqid.num for res in A if res.seqid.icode != ' '], [56] * 5) self.assertEqual(len(A['55']), 1) self.assertEqual(len(A['55B']), 0) self.assertEqual(len(A['56B']), 1) self.assertEqual(A['56'][0].seqid.icode, ' ') self.assertEqual(A['56c'][0].seqid.icode, 'C') result = gemmi.align_sequence_to_polymer(st.entities[0].full_sequence, A.get_polymer(), gemmi.PolymerType.Unknown) self.assertEqual(result.cigar_str(), '2I64M5I')
def test_5moo_header(self): st = gemmi.read_structure(full_path('5moo_header.pdb')) block = st.make_mmcif_document().sole_block() refine = block.get_mmcif_category('_refine') self.assertEqual(refine['ls_d_res_high'], ['1.44', '1.43']) self.assertEqual(refine['pdbx_starting_model'], ['4I8H'] * 2) self.assertEqual(list(block.find_values('_diffrn.ambient_temp')), ['295', '295'])
def test_read_5i55(self): cell = gemmi.read_structure(full_path('5i55.cif')).cell self.assertAlmostEqual(cell.a, 29.46) self.assertAlmostEqual(cell.b, 10.51) self.assertAlmostEqual(cell.c, 29.71) self.assertEqual(cell.alpha, 90) self.assertAlmostEqual(cell.beta, 111.98) self.assertEqual(cell.gamma, 90)
def cif_to_embed(cif_file, ix=None, parse_skip=False): """ Parses a CIF file into a more convenient representation. # Embedding format for nodes: # 'one hot amino acid' amino type of molecule # 'x, y, z' positional encoding # 'one hot representation of atom type', either C, CA, N, O, """ st = gemmi.read_structure(cif_file) results = [] skips = [] for model in st: for i, chain in enumerate(model): if (ix is not None) and (ix != i): continue atoms = [] node_embeddings = [] for j, residue in enumerate(chain): translation = [] if residue.name not in residue_names: # Skip over any structure that contains nucleotides if residue.name in ["DA", "DC", "DG", "DT"]: return None, None else: continue residue_counter = 0 namino_elements = len(res_parents[residue.name]) amino_atoms = res_atoms[residue.name] residue_atoms = [] residue_embed = [] # reisdue object contains information about the residue, including identity # and spatial coordiantes for atoms in the residue. We parse this into a # dense encoding, for feeding into a neural network. node_embed = parse_residue_embed(residue) if len(node_embed) == 0: skips.append(j) node_embeddings.extend(node_embed) node_embeddings = np.array(node_embeddings) result = (node_embeddings, ) results.append(result) if parse_skip: return st, results, skips else: return st, results
def test_ncs(self): st = gemmi.read_structure(full_path('5cvz_final.pdb')) first_atom = st[0].sole_residue('A', 17, ' ')['N'] ne2 = st[0].sole_residue('A', 63, ' ')['NE2'] direct_dist = first_atom.pos.dist(ne2.pos) self.assertAlmostEqual(direct_dist, 34.89, delta=1e-2) nearest_image = st.cell.find_nearest_image(first_atom.pos, ne2.pos) nearest_dist = nearest_image.dist() self.assertAlmostEqual(nearest_dist, 8.02, delta=1e-2)
def test_ncs_in_1lzh(self): st = gemmi.read_structure(full_path('1lzh.pdb.gz')) self.assertEqual(len(st.ncs), 1) A, B = st[0] for ra, rb in zip(A, B): pa = ra['CA'].pos pb = rb['CA'].pos image_of_pb = st.ncs[0].apply(pb) self.assertTrue(pa.dist(image_of_pb) < 0.01)
def write_back_and_compare(self, path, via_cif): st = gemmi.read_structure(path) if via_cif: doc = st.make_mmcif_document() st = gemmi.make_structure_from_block(doc[0]) handle, out_name = tempfile.mkstemp() os.close(handle) st.write_pdb(out_name) return read_lines_and_remove(out_name)
def test_read_write_1lzh(self, via_cif=False): path = full_path('1lzh.pdb.gz') mode = 'rt' if sys.version_info >= (3, ) else 'r' with gzip.open(path, mode=mode) as f: expected = [line for line in f if is_written_to_pdb(line, via_cif)] out_lines = self.write_and_read(gemmi.read_structure(path), via_cif) self.assertEqual(expected[0], out_lines[0]) # TITLE lines differ because the text is broken at different word self.assertEqual(expected[3:], out_lines[3:])
def read_pdb(): """ Read the given PDB file and show the atom positions """ # Create the argument parser parser = argparse.ArgumentParser(description="Read a PDB file") # Add an argument for the filename parser.add_argument( "-f", "--filename", type=str, default=None, dest="filename", help="The path to the PDB file", ) # Parse the arguments args = parser.parse_args() # Check a filename has been given if args.filename is None: parser.print_help() exit(0) # Configure some basic logging configure_logging() # Read the structure structure = gemmi.read_structure(args.filename) # Iterate through atoms prefix = " " * 4 logger.info("Structure: %s" % structure.name) for model in structure: logger.info("%sModel: %s" % (prefix, model.name)) for chain in model: logger.info("%sChain: %s" % (prefix * 2, chain.name)) for residue in chain: logger.info("%sResidue: %s" % (prefix * 3, residue.name)) for atom in residue: logger.info( "%sAtom: %s, %f, %f, %f, %f, %f, %f" % ( prefix * 4, atom.element.name, atom.pos.x, atom.pos.y, atom.pos.z, atom.occ, atom.charge, parakeet.sample.get_atom_sigma(atom), ) )
def test_extract_sequence_info(self): st = gemmi.read_structure(full_path('5cvz_final.pdb')) st.add_entity_types() polymer = st[0][0].get_polymer() self.assertEqual(polymer.check_polymer_type(), gemmi.PolymerType.PeptideL) expected = ('AAATSLVYDTCYVTLTERATTSFQRQSFPTLKGMGDRAFQVVAFTIQGVS' 'AAPLMYNARLYNPGDTDSVHATGVQLMGTVPRTVRLTPRVGQNNWFFGNT' 'EEAETILAIDGLVSTKGANAPSNTVIVTGCFRLAPSELQSS') self.assertEqual(polymer.make_one_letter_sequence(), expected)
def test_ncs(self): st = gemmi.read_structure(full_path('5cvz_final.pdb')) self.assertEqual(st.resolution, 3.29) first_atom = st[0].sole_residue('A', gemmi.SeqId(17, ' '))[0] ne2 = st[0].sole_residue('A', gemmi.SeqId('63')).sole_atom('NE2') direct_dist = first_atom.pos.dist(ne2.pos) self.assertAlmostEqual(direct_dist, 34.89, delta=1e-2) nearest_image = st.cell.find_nearest_image(first_atom.pos, ne2.pos) nearest_dist = nearest_image.dist() self.assertAlmostEqual(nearest_dist, 8.02, delta=1e-2)
def test_assembly_naming(self): st = gemmi.read_structure(full_path('4oz7.pdb')) model = st[0] a1 = st.assemblies[1] bio = a1.make_assembly(model, gemmi.HowToNameCopiedChains.AddNumber) self.assertEqual([ch.name for ch in bio], ['B1']) bio = a1.make_assembly(model, gemmi.HowToNameCopiedChains.Dup) self.assertEqual([ch.name for ch in bio], ['B']) bio = a1.make_assembly(model, gemmi.HowToNameCopiedChains.Short) self.assertEqual([ch.name for ch in bio], ['B'])
def test_remove2(self): model = gemmi.read_structure(full_path('1pfe.cif.gz'))[0] self.assertEqual(len(model), 2) del model['A'] self.assertEqual(len(model), 1) b = model['B'] self.assertEqual(b[0].name, 'DSN') del b['1']['DSN'] self.assertEqual(b[0].name, 'ALA') del b[0] self.assertEqual(b[0].name, 'N2C')
def test_3dg1(self): st = gemmi.read_structure(full_path('3dg1_final.cif')) self.assertEqual(st.info['_entry.id'], '3DG1') self.assertEqual(len(st[0]), 1) chain = st[0]['A'] for res in chain[-2:]: self.assertEqual(res.name, 'HOH') for res in chain: for atom in res: n_images = st.cell.is_special_position(atom.pos) self.assertEqual(atom.occ * (n_images + 1), 1.0)