def test_genbank_bad_loc_wrap_parsing(self): with warnings.catch_warnings(): warnings.simplefilter("ignore", BiopythonParserWarning) with open(path.join("GenBank", "bad_loc_wrap.gb")) as handle: record = GenBank.read(handle) self.assertEqual(1, len(record.features)) loc = record.features[0].location self.assertEqual(loc, "join(3462..3615,3698..3978,4077..4307,4408..4797,4876..5028,5141..5332)")
def test_000_genbank_bad_loc_wrap_warning(self): with warnings.catch_warnings(): warnings.simplefilter("error", BiopythonParserWarning) with open(path.join("GenBank", "bad_loc_wrap.gb")) as handle: # self.assertRaises(BiopythonParserWarning, GenBank.read, handle) try: record = GenBank.read(handle) except BiopythonParserWarning as e: self.assertEqual(str(e), "Non-standard feature line wrapping (didn't break on comma)?") else: self.assertTrue(False, "Expected specified BiopythonParserWarning here.")
def parse(path='./flat_files/'): path = Path(path) print "parsing records at {}".format(path.absolute()) records = [] for p in path.listdir(): try: gbr = GenBank.read(open(p)) records.append(gbr) except: print 'error with file', p print "parsed %s records.." % len(records) return records
def fetch_gb(id, taxon=None): r = eutils.fetch(db='nuccore', id=id, rettype='gb') if r.ok: gb = GenBank.read(StringIO(r.text)) filename = f"{gb.locus}{'.' + taxon if taxon else ''}.gb" file_out = os.path.join(config.genbank_dir, filename) if os.path.exists(file_out): print(f"[WARN] {file_out} already exists") else: with open(file_out, 'w') as fh: fh.write(r.text) print(f"[INFO] Fetched file: {file_out}") else: print(f"[WARN] Could not download '{id}'")
def from_genbank(cls, filepath: str): try: gb = GenBank.read(file=filepath) source = GenBank.get_source_data(gb) return cls.insert( accession=gb.accession[0], organism=gb.organism, date_released=GenBank.format_date(gb.date), host=source.get('host'), date_collected=source.get('collection_date'), country=source.get('country'), ) except Exception as e: logging.warning( f"Error inserting {filepath} to {cls.__name__}: {e}")
def add_file(cls, filepath: str): """TODO(seanbeagle): Create scraping tool for genbank data similar to get_source_data()""" try: gb = GenBank.read(file=filepath) accession = gb.accession[0] logging.debug(f"Adding {accession} to {cls}...") record = cls.insert( accession=accession, version=gb.version, filepath=filepath, # TODO: Ensure this is absolute filepath date_downloaded=now(), downloaded_by=getpass.getuser(), num_features=len(gb.features), length=len(gb)) return record except Exception as e: logging.debug(f"Could not insert GenBank record: {e}")
def fetch(id: str): logging.info(f"Fetching GenBank id={id}") r = eutils.fetch(db='nuccore', id=id, rettype='gb') if r.ok: gb = GenBank.read(string=r.text) accession = gb.accession[0] filename = f"{accession}{'.' + config.taxon if config.taxon else ''}.gb" file_out = os.path.join(FileSystem.dir['genbank'], filename) record = GenBank.query.filter_by(version=gb.version).first() if record and os.path.exists(record.filepath): logging.info( f"{accession} already exists as file='{record.filepath}' and GenBank.id={record.id}" ) else: with open(file_out, 'w') as fh: fh.write(r.text) GenBank.add_file(file_out) print(f"[INFO] Fetched file: {file_out}") return file_out else: print(f"[WARN] Could not download '{id}'")
def test_genbank_read(self): """GenBank.read(...) simple test.""" with open(path.join("GenBank", "NC_000932.gb")) as handle: record = GenBank.read(handle) self.assertEqual(['NC_000932'], record.accession)
def test_genbank_read(self): with open(path.join("GenBank", "NC_000932.gb")) as handle: record = GenBank.read(handle) self.assertEqual(['NC_000932'], record.accession)