Ejemplo n.º 1
0
    def test_write_db(self):
        """writing with overwrite in MPI should reset db"""
        dstore = io_app.get_data_store("data", suffix="fasta")
        members = dstore.filtered(
            callback=lambda x: "brca1.fasta" not in x.split("/"))
        reader = io_app.load_unaligned()
        aligner = align_app.align_to_ref()
        writer = write_db("delme.tinydb", create=True, if_exists="overwrite")
        process = reader + aligner + writer

        r = process.apply_to(
            members,
            logger=False,
            show_progress=False,
            parallel=True,
            par_kw=dict(use_mpi=True),
        )

        expect = [str(m) for m in process.data_store]
        process.data_store.close()

        # now get read only and check what's in there
        result = io_app.get_data_store("delme.tinydb")
        got = [str(m) for m in result]

        assert got == expect
Ejemplo n.º 2
0
    def test_load_json(self):
        """correctly loads an object from json"""
        from cogent3.app.data_store import make_record_for_json

        data = make_record_for_json("delme", DNA, True)
        data = json.dumps(data)
        # straight directory
        with TemporaryDirectory(dir=".") as dirname:
            outpath = join(dirname, "delme.json")
            with open(outpath, "w") as outfile:
                outfile.write(data)
            reader = io_app.load_json()
            got = reader(outpath)
            self.assertIsInstance(got, DNA.__class__)
            self.assertEqual(got, DNA)

        # zipped directory
        with TemporaryDirectory(dir=".") as dirname:
            zip_path = join(dirname, "delme.zip")
            outpath = "delme/delme.json"
            with zipfile.ZipFile(zip_path, "a") as out:
                out.writestr(outpath, data)

            dstore = io_app.get_data_store(zip_path, suffix="json")
            member = dstore.get_member("delme.json")
            reader = io_app.load_json()
            got = reader(member)
            self.assertIsInstance(got, DNA.__class__)
            self.assertEqual(got, DNA)
Ejemplo n.º 3
0
    def test_apply_to(self):
        """correctly applies iteratively"""
        from cogent3.core.alignment import SequenceCollection

        dstore = io_app.get_data_store("data", suffix="fasta", limit=3)
        reader = io_app.load_unaligned(format="fasta", moltype="dna")
        got = reader.apply_to(dstore, show_progress=False)
        self.assertEqual(len(got), len(dstore))
        # should also be able to apply the results to another composable func
        min_length = sample_app.min_length(10)
        got = min_length.apply_to(got, show_progress=False, logger=True)
        self.assertEqual(len(got), len(dstore))
        # should work on a chained function
        proc = reader + min_length
        got = proc.apply_to(dstore, show_progress=False)
        self.assertEqual(len(got), len(dstore))
        # and works on a list of just strings
        got = proc.apply_to([str(m) for m in dstore], show_progress=False)
        self.assertEqual(len(got), len(dstore))
        # or a single string
        got = proc.apply_to(str(dstore[0]), show_progress=False)
        self.assertEqual(len(got), 1)
        self.assertIsInstance(got[0], SequenceCollection)
        # raises ValueError if empty list
        with self.assertRaises(ValueError):
            proc.apply_to([])

        # raises ValueError if list with empty string
        with self.assertRaises(ValueError):
            proc.apply_to(["", ""])
Ejemplo n.º 4
0
 def test_load_aligned_nexus(self):
     """should handle nexus too"""
     nexus_paths = io_app.get_data_store(self.basedir, suffix="nex")
     loader = io_app.load_aligned(format="nexus")
     results = [loader(m) for m in nexus_paths]
     for result in results:
         self.assertIsInstance(result, ArrayAlignment)
Ejemplo n.º 5
0
    def test_apply_to_not_partially_done(self):
        """correctly applies process when result already partially done"""
        dstore = io_app.get_data_store("data", suffix="fasta")
        num_records = len(dstore)
        with TemporaryDirectory(dir=".") as dirname:
            dirname = pathlib.Path(dirname)
            reader = io_app.load_aligned(format="fasta", moltype="dna")
            outpath = dirname / "delme.tinydb"
            writer = io_app.write_db(outpath)
            _ = writer(reader(dstore[0]))
            writer.data_store.close()

            writer = io_app.write_db(outpath, if_exists="ignore")
            process = reader + writer
            _ = process.apply_to(dstore, show_progress=False)
            writer.data_store.close()
            dstore = io_app.get_data_store(outpath)
            self.assertEqual(len(dstore), num_records)
Ejemplo n.º 6
0
 def test_apply_to_not_completed(self):
     """correctly creates notcompleted"""
     dstore = io_app.get_data_store("data", suffix="fasta", limit=3)
     with TemporaryDirectory(dir=".") as dirname:
         reader = io_app.load_aligned(format="fasta", moltype="dna")
         # trigger creation of notcompleted
         min_length = sample_app.min_length(3000)
         outpath = os.path.join(os.getcwd(), dirname, "delme.tinydb")
         writer = io_app.write_db(outpath)
         process = reader + min_length + writer
         r = process.apply_to(dstore, show_progress=False)
         self.assertEqual(len(process.data_store.incomplete), 3)
         process.data_store.close()
Ejemplo n.º 7
0
 def test_write_db_load_db2(self):
     """correctly write/load built-in python from tinydb"""
     with TemporaryDirectory(dir=".") as dirname:
         outpath = join(dirname, "delme")
         writer = write_db(outpath, create=True, if_exists="ignore")
         data = dict(a=[1, 2], b="string")
         m = writer(data, identifier=join("blah", "delme.json"))
         writer.data_store.db.close()
         dstore = io_app.get_data_store(f"{outpath}.tinydb", suffix="json")
         reader = io_app.load_db()
         got = reader(dstore[0])
         dstore.close()
         self.assertEqual(got, data)
Ejemplo n.º 8
0
    def test_load_aligned(self):
        """correctly loads aligned seqs"""

        def validate(paths, loader):
            loaded = list(map(loader, paths))
            for i, aln in enumerate(loaded):
                self.assertTrue(len(aln) > 10)
                self.assertIsInstance(aln, ArrayAlignment)
                self.assertEqual(aln.info.source, paths[i])

        fasta_paths = io_app.get_data_store(self.basedir, suffix=".fasta", limit=2)
        fasta_loader = io_app.load_aligned(format="fasta")
        validate(fasta_paths, fasta_loader)
Ejemplo n.º 9
0
    def test_load_unaligned(self):
        """load_unaligned returns degapped sequence collections"""
        fasta_paths = io_app.get_data_store(self.basedir, suffix=".fasta", limit=2)
        fasta_loader = io_app.load_unaligned(format="fasta")
        for i, seqs in enumerate(map(fasta_loader, fasta_paths)):
            self.assertIsInstance(seqs, SequenceCollection)
            self.assertTrue("-" not in "".join(seqs.to_dict().values()))
            self.assertEqual(seqs.info.source, fasta_paths[i])

        # returns NotCompleted when it's given an alignment/sequence
        # collection
        got = fasta_loader(seqs)
        self.assertIsInstance(got, NotCompleted)
Ejemplo n.º 10
0
 def test_apply_to_strings(self):
     """apply_to handles strings as paths"""
     dstore = io_app.get_data_store("data", suffix="fasta", limit=3)
     dstore = [str(m) for m in dstore]
     with TemporaryDirectory(dir=".") as dirname:
         reader = io_app.load_aligned(format="fasta", moltype="dna")
         min_length = sample_app.min_length(10)
         outpath = os.path.join(os.getcwd(), dirname, "delme.tinydb")
         writer = io_app.write_db(outpath)
         process = reader + min_length + writer
         # create paths as strings
         r = process.apply_to(dstore, show_progress=False)
         self.assertEqual(len(process.data_store.logs), 1)
         process.data_store.close()
Ejemplo n.º 11
0
 def test_write_db_load_db(self):
     """correctly write/load from tinydb"""
     # straight directory
     with TemporaryDirectory(dir=".") as dirname:
         outpath = join(dirname, "delme")
         writer = write_db(outpath, create=True, if_exists="ignore")
         gr = _get_generic_result(join("blah", "delme.json"))
         got = writer(gr)
         writer.data_store.db.close()
         dstore = io_app.get_data_store(f"{outpath}.tinydb", suffix="json")
         reader = io_app.load_db()
         got = reader(dstore[0])
         dstore.close()
         got.deserialised_values()
         self.assertIsInstance(got["dna"], DNA.__class__)
         self.assertEqual(got["dna"], DNA)
Ejemplo n.º 12
0
 def test_write_db_load_db(self):
     """correctly write/load from tinydb"""
     # straight directory
     with TemporaryDirectory(dir=".") as dirname:
         outpath = join(dirname, "delme")
         writer = write_db(outpath, create=True, if_exists="ignore")
         mock = patch("data.source", autospec=True)
         mock.to_json = DNA.to_json
         mock.source = join("blah", "delme.json")
         got = writer(mock)
         writer.data_store.db.close()
         dstore = io_app.get_data_store(f"{outpath}.tinydb", suffix="json")
         reader = io_app.load_db()
         got = reader(dstore[0])
         dstore.close()
         self.assertIsInstance(got, DNA.__class__)
         self.assertEqual(got, DNA)
Ejemplo n.º 13
0
    def test_load_db_failure_json_file(self):
        """informative load_db error message when given a json file path"""
        # todo this test has a trapped exception about being unable to delete
        # a file
        with TemporaryDirectory(dir=".") as dirname:
            outpath = join(dirname, "delme")
            writer = write_db(outpath, create=True, if_exists="ignore")
            gr = _get_generic_result(join("blah", "delme.json"))
            got = writer(gr)
            writer.data_store.db.close()
            dstore = io_app.get_data_store(f"{outpath}.tinydb", suffix="json")
            reader = io_app.load_db()
            outpath = join(dirname, "dummy.json")
            with open(outpath, mode="w") as outfile:
                outfile.write("\n\n")

            got = reader(outpath)
            self.assertIsInstance(got, NotCompleted)
            self.assertTrue("json" in got.message)
Ejemplo n.º 14
0
    def test_define_data_store(self):
        """returns an iterable data store"""
        found = io_app.get_data_store(self.basedir, suffix=".fasta")
        self.assertTrue(len(found) > 1)
        found = io_app.get_data_store(self.basedir, suffix=".fasta", limit=2)
        self.assertTrue(len(found) == 2)

        # and with a suffix
        found = list(io_app.get_data_store(self.basedir, suffix=".fasta*"))
        self.assertTrue(len(found) > 2)

        # with a wild-card suffix
        found = list(io_app.get_data_store(self.basedir, suffix="*"))
        self.assertEqual(len(os.listdir(self.basedir)), len(found))

        # raises ValueError if suffix not provided or invalid
        with self.assertRaises(ValueError):
            _ = io_app.get_data_store(self.basedir)

        with self.assertRaises(ValueError):
            _ = io_app.get_data_store(self.basedir, 1)