def test_pdb_path(self): """Test that our PDB path function works.""" self.assertEqual( pdb.pdb_path("1ABC", ".pdb.gz"), Path("ab/1abc.pdb.gz")) self.assertEqual( pdb.pdb_path("1AbC", ".cif"), Path("ab/1abc.cif")) self.assertEqual( pdb.pdb_path("1abC", ".cif.gz", "A"), Path("ab/1abc/1abc_A.cif.gz"))
def run(self, data, config=None, pipeline=None): """Run component.""" pdb_id = self.get_vals(data) url = self.URL.format(PDB=pdb_id.lower(), type=self.struc_type.value) path = pdb.pdb_path(pdb_id, ".{}.gz".format(self.struc_type.value), base_dir=self.base_dir) if (not self.overwrite) and path.exists(): return data path.parent.mkdir(parents=True, exist_ok=True) session = data["session"] if "session" in data else requests.Session() with session.get(url, stream=True) as r: with path.open("wb") as out_fh: shutil.copyfileobj(r.raw, out_fh) return data
def test_open_str(self): """Should be able to pass a string, not just a Path object.""" pdb_path = str(pdb.pdb_path("4n6v", ".cif.gz", base_dir=self.mmcif_dir)) with pdb.open_pdb(pdb_path) as pdb_in: self.assertGreater(len(pdb_in.readlines()), 0)
def test_open_pdb_compressed(self): """Open a compressed mmcif file.""" pdb_path = pdb.pdb_path("4n6v", ".cif.gz", base_dir=self.mmcif_dir) with pdb.open_pdb(pdb_path) as pdb_in: self.assertGreater(len(pdb_in.readlines()), 0)
def run(self, data, config=None, pipeline=None): """Run the component.""" pdb_id = self.get_vals(data) mmcif_parser = Bio.PDB.FastMMCIFParser() pdb_parser = Bio.PDB.PDBParser() source_file = pdb.find_pdb(pdb_id, base_dir=self.mmcif_dir) if source_file is None: self.logger.error("Could not find MMCIF file '%s' in '%s'", pdb_id, self.mmcif_dir) raise self.MissingSourceError(pdb_id) results = [] self.logger.debug("Extracting chains from %s (%s)", pdb_id, source_file) with pdb.open_pdb(source_file) as pdb_in: structure = mmcif_parser.get_structure(pdb_id, pdb_in) self.logger.debug("Found %d chains in %s", len(list(structure.get_chains())), pdb_id) for chain in structure[0]: pdb_file = pdb.pdb_path(pdb_id, ".pdb", chain.id, self.chain_dir) pdb_file.parent.mkdir(parents=True, exist_ok=True) result = data.copy() result["chain"] = chain.id result["structure"] = str(pdb_file) result["name"] = "{}_{}".format(pdb_id.lower(), chain.id) if not pdb_file.exists() or self.overwrite: self.logger.debug("Extracting chain %s from PDB %s", chain.id, pdb_id) # Store all captured log output in REMARK 999 general_logger = logging.getLogger("phyre_engine") with phyre_engine.logutils.capture_log( general_logger) as log_buf: # Select conformations. for selector in self.conf_sel: chain = selector.select(chain) template = Template.build(pdb_id, result["chain"], chain) log_buf.seek(0) template.remarks[999].extend(log_buf.readlines()) with pdb_file.open("w") as pdb_out: template.write(pdb_out) else: chain = pdb_parser.get_structure( "", result["structure"])[0]["A"] template = Template.build(pdb_id, result["chain"], chain) self.logger.debug( "Loaded existing chain %s of PDB %s from %s", chain.id, pdb_id, pdb_file) result["sequence"] = template.canonical_seq result["original_residues"] = template.mapping result["canonical_indices"] = template.canonical_indices results.append(result) return results