class CoreDbTests(unittest.TestCase):
    def setUp(self):
        self.db = CoreDb(":memory:")
        self.db.create_tables()
        self.run = (
            u"2015-10-11", u"HiSeq", u"Nextera XT", 1,
            u"run_file.fastq", u"Bob's run",
            )
        self.run_acc = self.db.register_run(*self.run)
        self.sample_bcs = [
            ("Sample1", "ABC"),
            ("Sample2", "DEF"),
            ("My.Sample3", "GHI"),
            ]
        self.single_sample = self.sample_bcs[0]
        self.annotations = {
            "SampleType": "Oral swab",
            "SubjectID": "Subj23",
            "study_group": "Healthy",
            "study_day": "1",
            }

    def test_register_run(self):        
        self.assertEqual(self.run_acc, 1)
        self.assertTrue(self.db.query_run_exists(self.run_acc))
        obs_run = self.db._query_run(self.run_acc)
        self.assertEqual(self.run, obs_run)
        # Registering the run twice should raise an error
        self.assertRaises(ValueError, self.db.register_run, *self.run)

    def test_query_run_exists(self):
        self.assertTrue(self.db.query_run_exists(1))

    def test_register_samples(self):
        # Here, accessions given by database cursor.  In other tests,
        # we double-check that we can actually find the samples in a
        # query
        registered_accessions = self.db.register_samples(
            1, self.sample_bcs)
        self.assertEqual(registered_accessions, [1, 2, 3])
        # Registering the samples again should raise an error
        self.assertRaises(
            ValueError, self.db.register_samples, 1, self.sample_bcs)

    def test_query_barcoded_sample_accessions(self):
        self.db.register_samples(1, self.sample_bcs)
        self.assertEqual(
            self.db.query_barcoded_sample_accessions(1, self.sample_bcs),
            [1, 2, 3])

    def test_query_sample_accessions(self):
        self.db.register_samples(1, self.sample_bcs)
        self.assertEqual(
            self.db.query_sample_accessions(1), [1, 2, 3])

    def test_remove_samples(self):
        self.db.register_samples(1, self.sample_bcs)
        self.db.remove_samples([1, 2, 3])
        self.assertEqual(self.db.query_sample_accessions(1), [])

    def test_register_and_remove_annotations(self):
        sample_accessions = self.db.register_samples(1, self.sample_bcs)
        for acc in sample_accessions:
            ann = [(acc, k, v) for k, v in self.annotations.items()]
            self.db.register_annotations(ann)

        self.db.remove_annotations(sample_accessions)
        for acc in sample_accessions:
            self.assertEqual(
                self.db.query_sample_annotations(acc), {})

    def test_register_and_query_annotations(self):
        self.db.register_samples(1, [("Sample1", "GGCCTT")])
        ann = [(1, k, v) for k, v in self.annotations.items()]
        self.db.register_annotations(ann)
        self.assertEqual(
            self.db.query_sample_annotations(1), self.annotations)

    def test_collect_standard_annotations(self):
        a = [
            (1, "SampleType", "a"),
            (1, "HostSpecies", "b"),
            (2, "SubjectID", "c"),
            ]
        obs = CoreDb._collect_standard_annotations(a)
        self.assertEqual(obs, {1: ["a", None, "b"], 2: [None, "c", None]})
Ejemplo n.º 2
0
class CoreDbTests(unittest.TestCase):
    def setUp(self):
        self.db = CoreDb(":memory:")
        self.db.create_tables()
        self.run = (
            u"2015-10-11",
            u"HiSeq",
            u"Nextera XT",
            1,
            u"run_file.fastq",
            u"Bob's run",
        )
        self.run_acc = self.db.register_run(*self.run)
        self.sample_bcs = [
            ("Sample1", "ABC"),
            ("Sample2", "DEF"),
            ("My.Sample3", "GHI"),
        ]
        self.single_sample = self.sample_bcs[0]
        self.annotations = {
            "SampleType": "Oral swab",
            "SubjectID": "Subj23",
            "study_group": "Healthy",
            "study_day": "1",
        }

    def test_register_run(self):
        self.assertEqual(self.run_acc, 1)
        self.assertTrue(self.db.query_run_exists(self.run_acc))
        obs_run = self.db._query_run(self.run_acc)
        self.assertEqual(self.run, obs_run)
        # Registering the run twice should raise an error
        self.assertRaises(ValueError, self.db.register_run, *self.run)

    def test_query_run_exists(self):
        self.assertTrue(self.db.query_run_exists(1))

    def test_register_samples(self):
        # Here, accessions given by database cursor.  In other tests,
        # we double-check that we can actually find the samples in a
        # query
        registered_accessions = self.db.register_samples(1, self.sample_bcs)
        self.assertEqual(registered_accessions, [1, 2, 3])
        # Registering the samples again should raise an error
        self.assertRaises(ValueError, self.db.register_samples, 1,
                          self.sample_bcs)

    def test_query_barcoded_sample_accessions(self):
        self.db.register_samples(1, self.sample_bcs)
        self.assertEqual(
            self.db.query_barcoded_sample_accessions(1, self.sample_bcs),
            [1, 2, 3])

    def test_query_sample_accessions(self):
        self.db.register_samples(1, self.sample_bcs)
        self.assertEqual(self.db.query_sample_accessions(1), [1, 2, 3])

    def test_remove_samples(self):
        self.db.register_samples(1, self.sample_bcs)
        self.db.remove_samples([1, 2, 3])
        self.assertEqual(self.db.query_sample_accessions(1), [])

    def test_register_and_remove_annotations(self):
        sample_accessions = self.db.register_samples(1, self.sample_bcs)
        for acc in sample_accessions:
            ann = [(acc, k, v) for k, v in self.annotations.items()]
            self.db.register_annotations(ann)

        self.db.remove_annotations(sample_accessions)
        for acc in sample_accessions:
            self.assertEqual(self.db.query_sample_annotations(acc), {})

    def test_register_and_query_annotations(self):
        self.db.register_samples(1, [("Sample1", "GGCCTT")])
        ann = [(1, k, v) for k, v in self.annotations.items()]
        self.db.register_annotations(ann)
        self.assertEqual(self.db.query_sample_annotations(1), self.annotations)

    def test_collect_standard_annotations(self):
        a = [
            (1, "SampleType", "a"),
            (1, "HostSpecies", "b"),
            (2, "SubjectID", "c"),
        ]
        obs = CoreDb._collect_standard_annotations(a)
        self.assertEqual(obs, {1: ["a", None, "b"], 2: [None, "c", None]})
class RegisterScriptTests(unittest.TestCase):
    def setUp(self):
        self.db = CoreDb(":memory:")
        self.db.create_tables()
        self.run_args = [
            "abc",
            "--lane", "1",
            "--date", "2008-09-21",
            "--type", "Illumina-MiSeq",
            "--comment", "mdsnfa adsf",
        ]
        self.samples = [{
            "SampleID": "abc123",
            "BarcodeSequence": "GGGCCT",
            "SampleType": "Oral swab",
            "bb": "cd e29",
        }]

    def test_rgister_run(self):
        out = io.StringIO()
        register_run(self.run_args, self.db, out)

        # Check that accession number is printed
        self.assertEqual(
            out.getvalue(),
            "Registered run 1 in the database\n"
        )

        # Check that attributes are saved in the database
        self.assertEqual(self.db._query_run(1), (
            u'2008-09-21', u'Illumina-MiSeq', u'Nextera XT', 1,
            u'abc', u'mdsnfa adsf'))

    def test_register_illumina_file(self):
        tmp_dir = tempfile.mkdtemp()
        fastq_dir = (
            "Miseq/160511_M03543_0047_000000000-APE6Y/Data/Intensities/"
            "BaseCalls")
        fastq_name = "Undetermined_S0_L001_R1_001.fastq.gz"

        os.makedirs(os.path.join(tmp_dir, fastq_dir))
        relative_fp = os.path.join(fastq_dir, fastq_name)
        absolute_fp = os.path.join(tmp_dir, relative_fp)
        f = gzip.GzipFile(absolute_fp, "w")
        f.write("@M03543:21:C8LJ2ANXX:1:2209:1084:2044 1:N:0:NNNNNNNN+NNNNNNNN")
        f.close()

        out = io.StringIO()
        original_cwd = os.getcwd()
        os.chdir(tmp_dir)
        try:
            register_illumina_file(
                [relative_fp, "abcd efg"], self.db, out)
        finally:
            os.chdir(original_cwd)
            shutil.rmtree(tmp_dir)

        self.assertEqual(self.db._query_run(1), (
            u'2016-05-11', u'Illumina-MiSeq', u'Nextera XT', 1,
            unicode(relative_fp), u'abcd efg'))

    def test_register_samples(self):
        register_run(self.run_args, self.db)
        out = io.StringIO()
        sample_file = temp_sample_file(self.samples)
        args = ["1", sample_file.name]
        register_sample_annotations(args, True, self.db, out)

        # Check that accession number is assigned
        obs_accessions = self.db.query_barcoded_sample_accessions(
            1, [("abc123", "GGGCCT")])
        self.assertEqual(obs_accessions, [1])

        # Check that annotations are saved to the database
        self.assertEqual(
            self.db.query_sample_annotations(1),
            {"SampleType": "Oral swab", "bb": "cd e29"})

    def test_register_annotations(self):
        register_run(self.run_args, self.db)
        sample_file = temp_sample_file(self.samples)
        args = [ "1", sample_file.name]
        register_sample_annotations(args, True, self.db)

        # Update SampleType, add fg
        new_annotations = {"SampleType": "Feces", "fg": "hi5 34"}
        modified_samples = [x.copy() for x in self.samples]
        modified_samples[0].update(new_annotations)
        # Remove bb
        del modified_samples[0]["bb"]
        sample_file = temp_sample_file(modified_samples)
        args = ["1", sample_file.name]
        register_sample_annotations(args, False, self.db)

        self.assertEqual(
            self.db.query_sample_annotations(1), new_annotations)

    def test_unregister_samples(self):
        register_run(self.run_args, self.db)
        out = io.StringIO()
        sample_file = temp_sample_file(self.samples)
        args = ["1", sample_file.name]
        register_sample_annotations(args, True, self.db, out)

        unregister_samples(["1"], self.db)
        self.assertEqual(self.db._query_nonstandard_annotations(1), {})
        self.assertEqual(self.db.query_sample_accessions(1), [])

    def test_register_sample_types(self):
        f = tempfile.NamedTemporaryFile("wt")
        f.write(SAMPLE_TYPES_TSV)
        f.seek(0)

        register_sample_types([f.name], self.db)
        self.assertEqual(
            self.db.query_standard_sample_types(),
            SAMPLE_TYPES_VALS)

        # Add a new sample type and re-register
        new_line = "Extra type	1	Just to test"
        f2 = tempfile.NamedTemporaryFile("wt")
        f2.write(SAMPLE_TYPES_TSV + new_line)
        f2.seek(0)

        register_sample_types([f2.name], self.db)
        self.assertEqual(
            self.db.query_standard_sample_types(),
            SAMPLE_TYPES_VALS + [("Extra type", 1, "Just to test")])

    def test_register_host_species(self):
        f = tempfile.NamedTemporaryFile("wt")
        f.write(HOST_SPECIES_TSV)
        f.seek(0)

        register_host_species([f.name], self.db)
        self.assertEqual(
            self.db.query_standard_host_species(),
            HOST_SPECIES_VALS)

        # Add a new species to the file and re-register
        new_line = "Hippo	Test	1243"
        f2 = tempfile.NamedTemporaryFile("wt")
        f2.write(HOST_SPECIES_TSV + new_line)
        f2.seek(0)

        register_host_species([f2.name], self.db)
        self.assertEqual(
            self.db.query_standard_host_species(),
            HOST_SPECIES_VALS + [("Hippo", "Test", 1243)])
Ejemplo n.º 4
0
class RegisterScriptTests(unittest.TestCase):
    def setUp(self):
        self.db = CoreDb(":memory:")
        self.db.create_tables()
        self.run_args = [
            "abc",
            "--lane",
            "1",
            "--date",
            "2008-09-21",
            "--type",
            "Illumina-MiSeq",
            "--comment",
            "mdsnfa adsf",
        ]
        self.samples = [{
            "SampleID": "abc123",
            "BarcodeSequence": "GGGCCT",
            "SampleType": "Oral swab",
            "bb": "cd e29",
        }]

    def test_rgister_run(self):
        out = io.StringIO()
        register_run(self.run_args, self.db, out)

        # Check that accession number is printed
        self.assertEqual(out.getvalue(), "Registered run 1 in the database\n")

        # Check that attributes are saved in the database
        self.assertEqual(self.db._query_run(1),
                         (u'2008-09-21', u'Illumina-MiSeq', u'Nextera XT', 1,
                          u'abc', u'mdsnfa adsf'))

    def test_register_illumina_file(self):
        tmp_dir = tempfile.mkdtemp()
        fastq_dir = (
            "Miseq/160511_M03543_0047_000000000-APE6Y/Data/Intensities/"
            "BaseCalls")
        fastq_name = "Undetermined_S0_L001_R1_001.fastq.gz"

        os.makedirs(os.path.join(tmp_dir, fastq_dir))
        relative_fp = os.path.join(fastq_dir, fastq_name)
        absolute_fp = os.path.join(tmp_dir, relative_fp)
        f = gzip.GzipFile(absolute_fp, "w")
        f.write(
            "@M03543:21:C8LJ2ANXX:1:2209:1084:2044 1:N:0:NNNNNNNN+NNNNNNNN")
        f.close()

        out = io.StringIO()
        original_cwd = os.getcwd()
        os.chdir(tmp_dir)
        try:
            register_illumina_file([relative_fp, "abcd efg"], self.db, out)
        finally:
            os.chdir(original_cwd)
            shutil.rmtree(tmp_dir)

        self.assertEqual(self.db._query_run(1),
                         (u'2016-05-11', u'Illumina-MiSeq', u'Nextera XT', 1,
                          unicode(relative_fp), u'abcd efg'))

    def test_register_samples(self):
        register_run(self.run_args, self.db)
        out = io.StringIO()
        sample_file = temp_sample_file(self.samples)
        args = ["1", sample_file.name]
        register_sample_annotations(args, True, self.db, out)

        # Check that accession number is assigned
        obs_accessions = self.db.query_barcoded_sample_accessions(
            1, [("abc123", "GGGCCT")])
        self.assertEqual(obs_accessions, [1])

        # Check that annotations are saved to the database
        self.assertEqual(self.db.query_sample_annotations(1), {
            "SampleType": "Oral swab",
            "bb": "cd e29"
        })

    def test_register_annotations(self):
        register_run(self.run_args, self.db)
        sample_file = temp_sample_file(self.samples)
        args = ["1", sample_file.name]
        register_sample_annotations(args, True, self.db)

        # Update SampleType, add fg
        new_annotations = {"SampleType": "Feces", "fg": "hi5 34"}
        modified_samples = [x.copy() for x in self.samples]
        modified_samples[0].update(new_annotations)
        # Remove bb
        del modified_samples[0]["bb"]
        sample_file = temp_sample_file(modified_samples)
        args = ["1", sample_file.name]
        register_sample_annotations(args, False, self.db)

        self.assertEqual(self.db.query_sample_annotations(1), new_annotations)

    def test_unregister_samples(self):
        register_run(self.run_args, self.db)
        out = io.StringIO()
        sample_file = temp_sample_file(self.samples)
        args = ["1", sample_file.name]
        register_sample_annotations(args, True, self.db, out)

        unregister_samples(["1"], self.db)
        self.assertEqual(self.db._query_nonstandard_annotations(1), {})
        self.assertEqual(self.db.query_sample_accessions(1), [])

    def test_register_sample_types(self):
        f = tempfile.NamedTemporaryFile("wt")
        f.write(SAMPLE_TYPES_TSV)
        f.seek(0)

        register_sample_types([f.name], self.db)
        self.assertEqual(self.db.query_standard_sample_types(),
                         SAMPLE_TYPES_VALS)

        # Add a new sample type and re-register
        new_line = "Extra type	1	Just to test"
        f2 = tempfile.NamedTemporaryFile("wt")
        f2.write(SAMPLE_TYPES_TSV + new_line)
        f2.seek(0)

        register_sample_types([f2.name], self.db)
        self.assertEqual(
            self.db.query_standard_sample_types(),
            SAMPLE_TYPES_VALS + [("Extra type", 1, "Just to test")])

    def test_register_host_species(self):
        f = tempfile.NamedTemporaryFile("wt")
        f.write(HOST_SPECIES_TSV)
        f.seek(0)

        register_host_species([f.name], self.db)
        self.assertEqual(self.db.query_standard_host_species(),
                         HOST_SPECIES_VALS)

        # Add a new species to the file and re-register
        new_line = "Hippo	Test	1243"
        f2 = tempfile.NamedTemporaryFile("wt")
        f2.write(HOST_SPECIES_TSV + new_line)
        f2.seek(0)

        register_host_species([f2.name], self.db)
        self.assertEqual(self.db.query_standard_host_species(),
                         HOST_SPECIES_VALS + [("Hippo", "Test", 1243)])