def test_write_db(self): """writing with overwrite in MPI should reset db""" dstore = io_app.get_data_store("data", suffix="fasta") members = dstore.filtered( callback=lambda x: "brca1.fasta" not in x.split("/")) reader = io_app.load_unaligned() aligner = align_app.align_to_ref() writer = write_db("delme.tinydb", create=True, if_exists="overwrite") process = reader + aligner + writer r = process.apply_to( members, logger=False, show_progress=False, parallel=True, par_kw=dict(use_mpi=True), ) expect = [str(m) for m in process.data_store] process.data_store.close() # now get read only and check what's in there result = io_app.get_data_store("delme.tinydb") got = [str(m) for m in result] assert got == expect
def test_load_json(self): """correctly loads an object from json""" from cogent3.app.data_store import make_record_for_json data = make_record_for_json("delme", DNA, True) data = json.dumps(data) # straight directory with TemporaryDirectory(dir=".") as dirname: outpath = join(dirname, "delme.json") with open(outpath, "w") as outfile: outfile.write(data) reader = io_app.load_json() got = reader(outpath) self.assertIsInstance(got, DNA.__class__) self.assertEqual(got, DNA) # zipped directory with TemporaryDirectory(dir=".") as dirname: zip_path = join(dirname, "delme.zip") outpath = "delme/delme.json" with zipfile.ZipFile(zip_path, "a") as out: out.writestr(outpath, data) dstore = io_app.get_data_store(zip_path, suffix="json") member = dstore.get_member("delme.json") reader = io_app.load_json() got = reader(member) self.assertIsInstance(got, DNA.__class__) self.assertEqual(got, DNA)
def test_apply_to(self): """correctly applies iteratively""" from cogent3.core.alignment import SequenceCollection dstore = io_app.get_data_store("data", suffix="fasta", limit=3) reader = io_app.load_unaligned(format="fasta", moltype="dna") got = reader.apply_to(dstore, show_progress=False) self.assertEqual(len(got), len(dstore)) # should also be able to apply the results to another composable func min_length = sample_app.min_length(10) got = min_length.apply_to(got, show_progress=False, logger=True) self.assertEqual(len(got), len(dstore)) # should work on a chained function proc = reader + min_length got = proc.apply_to(dstore, show_progress=False) self.assertEqual(len(got), len(dstore)) # and works on a list of just strings got = proc.apply_to([str(m) for m in dstore], show_progress=False) self.assertEqual(len(got), len(dstore)) # or a single string got = proc.apply_to(str(dstore[0]), show_progress=False) self.assertEqual(len(got), 1) self.assertIsInstance(got[0], SequenceCollection) # raises ValueError if empty list with self.assertRaises(ValueError): proc.apply_to([]) # raises ValueError if list with empty string with self.assertRaises(ValueError): proc.apply_to(["", ""])
def test_load_aligned_nexus(self): """should handle nexus too""" nexus_paths = io_app.get_data_store(self.basedir, suffix="nex") loader = io_app.load_aligned(format="nexus") results = [loader(m) for m in nexus_paths] for result in results: self.assertIsInstance(result, ArrayAlignment)
def test_apply_to_not_partially_done(self): """correctly applies process when result already partially done""" dstore = io_app.get_data_store("data", suffix="fasta") num_records = len(dstore) with TemporaryDirectory(dir=".") as dirname: dirname = pathlib.Path(dirname) reader = io_app.load_aligned(format="fasta", moltype="dna") outpath = dirname / "delme.tinydb" writer = io_app.write_db(outpath) _ = writer(reader(dstore[0])) writer.data_store.close() writer = io_app.write_db(outpath, if_exists="ignore") process = reader + writer _ = process.apply_to(dstore, show_progress=False) writer.data_store.close() dstore = io_app.get_data_store(outpath) self.assertEqual(len(dstore), num_records)
def test_apply_to_not_completed(self): """correctly creates notcompleted""" dstore = io_app.get_data_store("data", suffix="fasta", limit=3) with TemporaryDirectory(dir=".") as dirname: reader = io_app.load_aligned(format="fasta", moltype="dna") # trigger creation of notcompleted min_length = sample_app.min_length(3000) outpath = os.path.join(os.getcwd(), dirname, "delme.tinydb") writer = io_app.write_db(outpath) process = reader + min_length + writer r = process.apply_to(dstore, show_progress=False) self.assertEqual(len(process.data_store.incomplete), 3) process.data_store.close()
def test_write_db_load_db2(self): """correctly write/load built-in python from tinydb""" with TemporaryDirectory(dir=".") as dirname: outpath = join(dirname, "delme") writer = write_db(outpath, create=True, if_exists="ignore") data = dict(a=[1, 2], b="string") m = writer(data, identifier=join("blah", "delme.json")) writer.data_store.db.close() dstore = io_app.get_data_store(f"{outpath}.tinydb", suffix="json") reader = io_app.load_db() got = reader(dstore[0]) dstore.close() self.assertEqual(got, data)
def test_load_aligned(self): """correctly loads aligned seqs""" def validate(paths, loader): loaded = list(map(loader, paths)) for i, aln in enumerate(loaded): self.assertTrue(len(aln) > 10) self.assertIsInstance(aln, ArrayAlignment) self.assertEqual(aln.info.source, paths[i]) fasta_paths = io_app.get_data_store(self.basedir, suffix=".fasta", limit=2) fasta_loader = io_app.load_aligned(format="fasta") validate(fasta_paths, fasta_loader)
def test_load_unaligned(self): """load_unaligned returns degapped sequence collections""" fasta_paths = io_app.get_data_store(self.basedir, suffix=".fasta", limit=2) fasta_loader = io_app.load_unaligned(format="fasta") for i, seqs in enumerate(map(fasta_loader, fasta_paths)): self.assertIsInstance(seqs, SequenceCollection) self.assertTrue("-" not in "".join(seqs.to_dict().values())) self.assertEqual(seqs.info.source, fasta_paths[i]) # returns NotCompleted when it's given an alignment/sequence # collection got = fasta_loader(seqs) self.assertIsInstance(got, NotCompleted)
def test_apply_to_strings(self): """apply_to handles strings as paths""" dstore = io_app.get_data_store("data", suffix="fasta", limit=3) dstore = [str(m) for m in dstore] with TemporaryDirectory(dir=".") as dirname: reader = io_app.load_aligned(format="fasta", moltype="dna") min_length = sample_app.min_length(10) outpath = os.path.join(os.getcwd(), dirname, "delme.tinydb") writer = io_app.write_db(outpath) process = reader + min_length + writer # create paths as strings r = process.apply_to(dstore, show_progress=False) self.assertEqual(len(process.data_store.logs), 1) process.data_store.close()
def test_write_db_load_db(self): """correctly write/load from tinydb""" # straight directory with TemporaryDirectory(dir=".") as dirname: outpath = join(dirname, "delme") writer = write_db(outpath, create=True, if_exists="ignore") gr = _get_generic_result(join("blah", "delme.json")) got = writer(gr) writer.data_store.db.close() dstore = io_app.get_data_store(f"{outpath}.tinydb", suffix="json") reader = io_app.load_db() got = reader(dstore[0]) dstore.close() got.deserialised_values() self.assertIsInstance(got["dna"], DNA.__class__) self.assertEqual(got["dna"], DNA)
def test_write_db_load_db(self): """correctly write/load from tinydb""" # straight directory with TemporaryDirectory(dir=".") as dirname: outpath = join(dirname, "delme") writer = write_db(outpath, create=True, if_exists="ignore") mock = patch("data.source", autospec=True) mock.to_json = DNA.to_json mock.source = join("blah", "delme.json") got = writer(mock) writer.data_store.db.close() dstore = io_app.get_data_store(f"{outpath}.tinydb", suffix="json") reader = io_app.load_db() got = reader(dstore[0]) dstore.close() self.assertIsInstance(got, DNA.__class__) self.assertEqual(got, DNA)
def test_load_db_failure_json_file(self): """informative load_db error message when given a json file path""" # todo this test has a trapped exception about being unable to delete # a file with TemporaryDirectory(dir=".") as dirname: outpath = join(dirname, "delme") writer = write_db(outpath, create=True, if_exists="ignore") gr = _get_generic_result(join("blah", "delme.json")) got = writer(gr) writer.data_store.db.close() dstore = io_app.get_data_store(f"{outpath}.tinydb", suffix="json") reader = io_app.load_db() outpath = join(dirname, "dummy.json") with open(outpath, mode="w") as outfile: outfile.write("\n\n") got = reader(outpath) self.assertIsInstance(got, NotCompleted) self.assertTrue("json" in got.message)
def test_define_data_store(self): """returns an iterable data store""" found = io_app.get_data_store(self.basedir, suffix=".fasta") self.assertTrue(len(found) > 1) found = io_app.get_data_store(self.basedir, suffix=".fasta", limit=2) self.assertTrue(len(found) == 2) # and with a suffix found = list(io_app.get_data_store(self.basedir, suffix=".fasta*")) self.assertTrue(len(found) > 2) # with a wild-card suffix found = list(io_app.get_data_store(self.basedir, suffix="*")) self.assertEqual(len(os.listdir(self.basedir)), len(found)) # raises ValueError if suffix not provided or invalid with self.assertRaises(ValueError): _ = io_app.get_data_store(self.basedir) with self.assertRaises(ValueError): _ = io_app.get_data_store(self.basedir, 1)