def test_write_db(self): """writing with overwrite in MPI should reset db""" dstore = io_app.get_data_store("data", suffix="fasta") members = dstore.filtered( callback=lambda x: "brca1.fasta" not in x.split("/")) reader = io_app.load_unaligned() aligner = align_app.align_to_ref() writer = write_db("delme.tinydb", create=True, if_exists="overwrite") process = reader + aligner + writer r = process.apply_to( members, logger=False, show_progress=False, parallel=True, par_kw=dict(use_mpi=True), ) expect = [str(m) for m in process.data_store] process.data_store.close() # now get read only and check what's in there result = io_app.get_data_store("delme.tinydb") got = [str(m) for m in result] assert got == expect
def test_apply_to(self): """correctly applies iteratively""" from cogent3.core.alignment import SequenceCollection dstore = io_app.get_data_store("data", suffix="fasta", limit=3) reader = io_app.load_unaligned(format="fasta", moltype="dna") got = reader.apply_to(dstore, show_progress=False) self.assertEqual(len(got), len(dstore)) # should also be able to apply the results to another composable func min_length = sample_app.min_length(10) got = min_length.apply_to(got, show_progress=False, logger=True) self.assertEqual(len(got), len(dstore)) # should work on a chained function proc = reader + min_length got = proc.apply_to(dstore, show_progress=False) self.assertEqual(len(got), len(dstore)) # and works on a list of just strings got = proc.apply_to([str(m) for m in dstore], show_progress=False) self.assertEqual(len(got), len(dstore)) # or a single string got = proc.apply_to(str(dstore[0]), show_progress=False) self.assertEqual(len(got), 1) self.assertIsInstance(got[0], SequenceCollection) # raises ValueError if empty list with self.assertRaises(ValueError): proc.apply_to([]) # raises ValueError if list with empty string with self.assertRaises(ValueError): proc.apply_to(["", ""])
def test_load_unaligned(self): """load_unaligned returns degapped sequence collections""" fasta_paths = io_app.get_data_store(self.basedir, suffix=".fasta", limit=2) fasta_loader = io_app.load_unaligned(format="fasta") for i, seqs in enumerate(map(fasta_loader, fasta_paths)): self.assertIsInstance(seqs, SequenceCollection) self.assertTrue("-" not in "".join(seqs.to_dict().values())) self.assertEqual(seqs.info.source, fasta_paths[i]) # returns NotCompleted when it's given an alignment/sequence # collection got = fasta_loader(seqs) self.assertIsInstance(got, NotCompleted)