def test_collect_standard_annotations(self):
     a = [
         (1, "SampleType", "a"),
         (1, "HostSpecies", "b"),
         (2, "SubjectID", "c"),
         ]
     obs = CoreDb._collect_standard_annotations(a)
     self.assertEqual(obs, {1: ["a", None, "b"], 2: [None, "c", None]})
Example #2
0
 def test_collect_standard_annotations(self):
     a = [
         (1, "SampleType", "a"),
         (1, "HostSpecies", "b"),
         (2, "SubjectID", "c"),
     ]
     obs = CoreDb._collect_standard_annotations(a)
     self.assertEqual(obs, {1: ["a", None, "b"], 2: [None, "c", None]})
Example #3
0
 def setUp(self):
     self.db = CoreDb(":memory:")
     self.db.create_tables()
     self.run_args = [
         "abc",
         "--lane",
         "1",
         "--date",
         "2008-09-21",
         "--type",
         "Illumina-MiSeq",
         "--comment",
         "mdsnfa adsf",
     ]
     self.samples = [{
         "SampleID": "abc123",
         "BarcodeSequence": "GGGCCT",
         "SampleType": "Oral swab",
         "bb": "cd e29",
     }]
Example #4
0
 def setUp(self):
     self.db = CoreDb(":memory:")
     self.db.create_tables()
     self.run = (
         u"2015-10-11",
         u"HiSeq",
         u"Nextera XT",
         1,
         u"run_file.fastq",
         u"Bob's run",
     )
     self.run_acc = self.db.register_run(*self.run)
     self.sample_bcs = [
         ("Sample1", "ABC"),
         ("Sample2", "DEF"),
         ("My.Sample3", "GHI"),
     ]
     self.single_sample = self.sample_bcs[0]
     self.annotations = {
         "SampleType": "Oral swab",
         "SubjectID": "Subj23",
         "study_group": "Healthy",
         "study_day": "1",
     }
 def setUp(self):
     self.db = CoreDb(":memory:")
     self.db.create_tables()
     self.run_args = [
         "abc",
         "--lane", "1",
         "--date", "2008-09-21",
         "--type", "Illumina-MiSeq",
         "--comment", "mdsnfa adsf",
     ]
     self.samples = [{
         "SampleID": "abc123",
         "BarcodeSequence": "GGGCCT",
         "SampleType": "Oral swab",
         "bb": "cd e29",
     }]
 def setUp(self):
     self.db = CoreDb(":memory:")
     self.db.create_tables()
     self.run = (
         u"2015-10-11", u"HiSeq", u"Nextera XT", 1,
         u"run_file.fastq", u"Bob's run",
         )
     self.run_acc = self.db.register_run(*self.run)
     self.sample_bcs = [
         ("Sample1", "ABC"),
         ("Sample2", "DEF"),
         ("My.Sample3", "GHI"),
         ]
     self.single_sample = self.sample_bcs[0]
     self.annotations = {
         "SampleType": "Oral swab",
         "SubjectID": "Subj23",
         "study_group": "Healthy",
         "study_day": "1",
         }
Example #7
0
"""Add samples and runs to the registry"""

import argparse
import itertools
import os
import re
import sys
import gzip

from sample_registry.db import CoreDb
from sample_registry.mapping import SampleTable
from sample_registry.illumina import IlluminaFastq

REGISTRY_DATABASE = CoreDb("/var/local/sample_registry/core.db")

SAMPLES_DESC = """\
Add new samples to the registry, with annotations.
"""

ANNOTATIONS_DESC = """\
Replace annotations for samples in the registry.  Samples are matched
using the sample ID and barcode sequence.
"""

ANNOTATIONS_EPILOG = """\
**BEWARE USER** This script will replace all existing annotations with
those found in the provided file!  Make sure this is what you want, or
you will be restoring database tables from backup files, as you deserve.
You have been warned!!!
"""
class CoreDbTests(unittest.TestCase):
    def setUp(self):
        self.db = CoreDb(":memory:")
        self.db.create_tables()
        self.run = (
            u"2015-10-11", u"HiSeq", u"Nextera XT", 1,
            u"run_file.fastq", u"Bob's run",
            )
        self.run_acc = self.db.register_run(*self.run)
        self.sample_bcs = [
            ("Sample1", "ABC"),
            ("Sample2", "DEF"),
            ("My.Sample3", "GHI"),
            ]
        self.single_sample = self.sample_bcs[0]
        self.annotations = {
            "SampleType": "Oral swab",
            "SubjectID": "Subj23",
            "study_group": "Healthy",
            "study_day": "1",
            }

    def test_register_run(self):        
        self.assertEqual(self.run_acc, 1)
        self.assertTrue(self.db.query_run_exists(self.run_acc))
        obs_run = self.db._query_run(self.run_acc)
        self.assertEqual(self.run, obs_run)
        # Registering the run twice should raise an error
        self.assertRaises(ValueError, self.db.register_run, *self.run)

    def test_query_run_exists(self):
        self.assertTrue(self.db.query_run_exists(1))

    def test_register_samples(self):
        # Here, accessions given by database cursor.  In other tests,
        # we double-check that we can actually find the samples in a
        # query
        registered_accessions = self.db.register_samples(
            1, self.sample_bcs)
        self.assertEqual(registered_accessions, [1, 2, 3])
        # Registering the samples again should raise an error
        self.assertRaises(
            ValueError, self.db.register_samples, 1, self.sample_bcs)

    def test_query_barcoded_sample_accessions(self):
        self.db.register_samples(1, self.sample_bcs)
        self.assertEqual(
            self.db.query_barcoded_sample_accessions(1, self.sample_bcs),
            [1, 2, 3])

    def test_query_sample_accessions(self):
        self.db.register_samples(1, self.sample_bcs)
        self.assertEqual(
            self.db.query_sample_accessions(1), [1, 2, 3])

    def test_remove_samples(self):
        self.db.register_samples(1, self.sample_bcs)
        self.db.remove_samples([1, 2, 3])
        self.assertEqual(self.db.query_sample_accessions(1), [])

    def test_register_and_remove_annotations(self):
        sample_accessions = self.db.register_samples(1, self.sample_bcs)
        for acc in sample_accessions:
            ann = [(acc, k, v) for k, v in self.annotations.items()]
            self.db.register_annotations(ann)

        self.db.remove_annotations(sample_accessions)
        for acc in sample_accessions:
            self.assertEqual(
                self.db.query_sample_annotations(acc), {})

    def test_register_and_query_annotations(self):
        self.db.register_samples(1, [("Sample1", "GGCCTT")])
        ann = [(1, k, v) for k, v in self.annotations.items()]
        self.db.register_annotations(ann)
        self.assertEqual(
            self.db.query_sample_annotations(1), self.annotations)

    def test_collect_standard_annotations(self):
        a = [
            (1, "SampleType", "a"),
            (1, "HostSpecies", "b"),
            (2, "SubjectID", "c"),
            ]
        obs = CoreDb._collect_standard_annotations(a)
        self.assertEqual(obs, {1: ["a", None, "b"], 2: [None, "c", None]})
class RegisterScriptTests(unittest.TestCase):
    def setUp(self):
        self.db = CoreDb(":memory:")
        self.db.create_tables()
        self.run_args = [
            "abc",
            "--lane", "1",
            "--date", "2008-09-21",
            "--type", "Illumina-MiSeq",
            "--comment", "mdsnfa adsf",
        ]
        self.samples = [{
            "SampleID": "abc123",
            "BarcodeSequence": "GGGCCT",
            "SampleType": "Oral swab",
            "bb": "cd e29",
        }]

    def test_rgister_run(self):
        out = io.StringIO()
        register_run(self.run_args, self.db, out)

        # Check that accession number is printed
        self.assertEqual(
            out.getvalue(),
            "Registered run 1 in the database\n"
        )

        # Check that attributes are saved in the database
        self.assertEqual(self.db._query_run(1), (
            u'2008-09-21', u'Illumina-MiSeq', u'Nextera XT', 1,
            u'abc', u'mdsnfa adsf'))

    def test_register_illumina_file(self):
        tmp_dir = tempfile.mkdtemp()
        fastq_dir = (
            "Miseq/160511_M03543_0047_000000000-APE6Y/Data/Intensities/"
            "BaseCalls")
        fastq_name = "Undetermined_S0_L001_R1_001.fastq.gz"

        os.makedirs(os.path.join(tmp_dir, fastq_dir))
        relative_fp = os.path.join(fastq_dir, fastq_name)
        absolute_fp = os.path.join(tmp_dir, relative_fp)
        f = gzip.GzipFile(absolute_fp, "w")
        f.write("@M03543:21:C8LJ2ANXX:1:2209:1084:2044 1:N:0:NNNNNNNN+NNNNNNNN")
        f.close()

        out = io.StringIO()
        original_cwd = os.getcwd()
        os.chdir(tmp_dir)
        try:
            register_illumina_file(
                [relative_fp, "abcd efg"], self.db, out)
        finally:
            os.chdir(original_cwd)
            shutil.rmtree(tmp_dir)

        self.assertEqual(self.db._query_run(1), (
            u'2016-05-11', u'Illumina-MiSeq', u'Nextera XT', 1,
            unicode(relative_fp), u'abcd efg'))

    def test_register_samples(self):
        register_run(self.run_args, self.db)
        out = io.StringIO()
        sample_file = temp_sample_file(self.samples)
        args = ["1", sample_file.name]
        register_sample_annotations(args, True, self.db, out)

        # Check that accession number is assigned
        obs_accessions = self.db.query_barcoded_sample_accessions(
            1, [("abc123", "GGGCCT")])
        self.assertEqual(obs_accessions, [1])

        # Check that annotations are saved to the database
        self.assertEqual(
            self.db.query_sample_annotations(1),
            {"SampleType": "Oral swab", "bb": "cd e29"})

    def test_register_annotations(self):
        register_run(self.run_args, self.db)
        sample_file = temp_sample_file(self.samples)
        args = [ "1", sample_file.name]
        register_sample_annotations(args, True, self.db)

        # Update SampleType, add fg
        new_annotations = {"SampleType": "Feces", "fg": "hi5 34"}
        modified_samples = [x.copy() for x in self.samples]
        modified_samples[0].update(new_annotations)
        # Remove bb
        del modified_samples[0]["bb"]
        sample_file = temp_sample_file(modified_samples)
        args = ["1", sample_file.name]
        register_sample_annotations(args, False, self.db)

        self.assertEqual(
            self.db.query_sample_annotations(1), new_annotations)

    def test_unregister_samples(self):
        register_run(self.run_args, self.db)
        out = io.StringIO()
        sample_file = temp_sample_file(self.samples)
        args = ["1", sample_file.name]
        register_sample_annotations(args, True, self.db, out)

        unregister_samples(["1"], self.db)
        self.assertEqual(self.db._query_nonstandard_annotations(1), {})
        self.assertEqual(self.db.query_sample_accessions(1), [])

    def test_register_sample_types(self):
        f = tempfile.NamedTemporaryFile("wt")
        f.write(SAMPLE_TYPES_TSV)
        f.seek(0)

        register_sample_types([f.name], self.db)
        self.assertEqual(
            self.db.query_standard_sample_types(),
            SAMPLE_TYPES_VALS)

        # Add a new sample type and re-register
        new_line = "Extra type	1	Just to test"
        f2 = tempfile.NamedTemporaryFile("wt")
        f2.write(SAMPLE_TYPES_TSV + new_line)
        f2.seek(0)

        register_sample_types([f2.name], self.db)
        self.assertEqual(
            self.db.query_standard_sample_types(),
            SAMPLE_TYPES_VALS + [("Extra type", 1, "Just to test")])

    def test_register_host_species(self):
        f = tempfile.NamedTemporaryFile("wt")
        f.write(HOST_SPECIES_TSV)
        f.seek(0)

        register_host_species([f.name], self.db)
        self.assertEqual(
            self.db.query_standard_host_species(),
            HOST_SPECIES_VALS)

        # Add a new species to the file and re-register
        new_line = "Hippo	Test	1243"
        f2 = tempfile.NamedTemporaryFile("wt")
        f2.write(HOST_SPECIES_TSV + new_line)
        f2.seek(0)

        register_host_species([f2.name], self.db)
        self.assertEqual(
            self.db.query_standard_host_species(),
            HOST_SPECIES_VALS + [("Hippo", "Test", 1243)])
Example #10
0
class CoreDbTests(unittest.TestCase):
    def setUp(self):
        self.db = CoreDb(":memory:")
        self.db.create_tables()
        self.run = (
            u"2015-10-11",
            u"HiSeq",
            u"Nextera XT",
            1,
            u"run_file.fastq",
            u"Bob's run",
        )
        self.run_acc = self.db.register_run(*self.run)
        self.sample_bcs = [
            ("Sample1", "ABC"),
            ("Sample2", "DEF"),
            ("My.Sample3", "GHI"),
        ]
        self.single_sample = self.sample_bcs[0]
        self.annotations = {
            "SampleType": "Oral swab",
            "SubjectID": "Subj23",
            "study_group": "Healthy",
            "study_day": "1",
        }

    def test_register_run(self):
        self.assertEqual(self.run_acc, 1)
        self.assertTrue(self.db.query_run_exists(self.run_acc))
        obs_run = self.db._query_run(self.run_acc)
        self.assertEqual(self.run, obs_run)
        # Registering the run twice should raise an error
        self.assertRaises(ValueError, self.db.register_run, *self.run)

    def test_query_run_exists(self):
        self.assertTrue(self.db.query_run_exists(1))

    def test_register_samples(self):
        # Here, accessions given by database cursor.  In other tests,
        # we double-check that we can actually find the samples in a
        # query
        registered_accessions = self.db.register_samples(1, self.sample_bcs)
        self.assertEqual(registered_accessions, [1, 2, 3])
        # Registering the samples again should raise an error
        self.assertRaises(ValueError, self.db.register_samples, 1,
                          self.sample_bcs)

    def test_query_barcoded_sample_accessions(self):
        self.db.register_samples(1, self.sample_bcs)
        self.assertEqual(
            self.db.query_barcoded_sample_accessions(1, self.sample_bcs),
            [1, 2, 3])

    def test_query_sample_accessions(self):
        self.db.register_samples(1, self.sample_bcs)
        self.assertEqual(self.db.query_sample_accessions(1), [1, 2, 3])

    def test_remove_samples(self):
        self.db.register_samples(1, self.sample_bcs)
        self.db.remove_samples([1, 2, 3])
        self.assertEqual(self.db.query_sample_accessions(1), [])

    def test_register_and_remove_annotations(self):
        sample_accessions = self.db.register_samples(1, self.sample_bcs)
        for acc in sample_accessions:
            ann = [(acc, k, v) for k, v in self.annotations.items()]
            self.db.register_annotations(ann)

        self.db.remove_annotations(sample_accessions)
        for acc in sample_accessions:
            self.assertEqual(self.db.query_sample_annotations(acc), {})

    def test_register_and_query_annotations(self):
        self.db.register_samples(1, [("Sample1", "GGCCTT")])
        ann = [(1, k, v) for k, v in self.annotations.items()]
        self.db.register_annotations(ann)
        self.assertEqual(self.db.query_sample_annotations(1), self.annotations)

    def test_collect_standard_annotations(self):
        a = [
            (1, "SampleType", "a"),
            (1, "HostSpecies", "b"),
            (2, "SubjectID", "c"),
        ]
        obs = CoreDb._collect_standard_annotations(a)
        self.assertEqual(obs, {1: ["a", None, "b"], 2: [None, "c", None]})
Example #11
0
class RegisterScriptTests(unittest.TestCase):
    def setUp(self):
        self.db = CoreDb(":memory:")
        self.db.create_tables()
        self.run_args = [
            "abc",
            "--lane",
            "1",
            "--date",
            "2008-09-21",
            "--type",
            "Illumina-MiSeq",
            "--comment",
            "mdsnfa adsf",
        ]
        self.samples = [{
            "SampleID": "abc123",
            "BarcodeSequence": "GGGCCT",
            "SampleType": "Oral swab",
            "bb": "cd e29",
        }]

    def test_rgister_run(self):
        out = io.StringIO()
        register_run(self.run_args, self.db, out)

        # Check that accession number is printed
        self.assertEqual(out.getvalue(), "Registered run 1 in the database\n")

        # Check that attributes are saved in the database
        self.assertEqual(self.db._query_run(1),
                         (u'2008-09-21', u'Illumina-MiSeq', u'Nextera XT', 1,
                          u'abc', u'mdsnfa adsf'))

    def test_register_illumina_file(self):
        tmp_dir = tempfile.mkdtemp()
        fastq_dir = (
            "Miseq/160511_M03543_0047_000000000-APE6Y/Data/Intensities/"
            "BaseCalls")
        fastq_name = "Undetermined_S0_L001_R1_001.fastq.gz"

        os.makedirs(os.path.join(tmp_dir, fastq_dir))
        relative_fp = os.path.join(fastq_dir, fastq_name)
        absolute_fp = os.path.join(tmp_dir, relative_fp)
        f = gzip.GzipFile(absolute_fp, "w")
        f.write(
            "@M03543:21:C8LJ2ANXX:1:2209:1084:2044 1:N:0:NNNNNNNN+NNNNNNNN")
        f.close()

        out = io.StringIO()
        original_cwd = os.getcwd()
        os.chdir(tmp_dir)
        try:
            register_illumina_file([relative_fp, "abcd efg"], self.db, out)
        finally:
            os.chdir(original_cwd)
            shutil.rmtree(tmp_dir)

        self.assertEqual(self.db._query_run(1),
                         (u'2016-05-11', u'Illumina-MiSeq', u'Nextera XT', 1,
                          unicode(relative_fp), u'abcd efg'))

    def test_register_samples(self):
        register_run(self.run_args, self.db)
        out = io.StringIO()
        sample_file = temp_sample_file(self.samples)
        args = ["1", sample_file.name]
        register_sample_annotations(args, True, self.db, out)

        # Check that accession number is assigned
        obs_accessions = self.db.query_barcoded_sample_accessions(
            1, [("abc123", "GGGCCT")])
        self.assertEqual(obs_accessions, [1])

        # Check that annotations are saved to the database
        self.assertEqual(self.db.query_sample_annotations(1), {
            "SampleType": "Oral swab",
            "bb": "cd e29"
        })

    def test_register_annotations(self):
        register_run(self.run_args, self.db)
        sample_file = temp_sample_file(self.samples)
        args = ["1", sample_file.name]
        register_sample_annotations(args, True, self.db)

        # Update SampleType, add fg
        new_annotations = {"SampleType": "Feces", "fg": "hi5 34"}
        modified_samples = [x.copy() for x in self.samples]
        modified_samples[0].update(new_annotations)
        # Remove bb
        del modified_samples[0]["bb"]
        sample_file = temp_sample_file(modified_samples)
        args = ["1", sample_file.name]
        register_sample_annotations(args, False, self.db)

        self.assertEqual(self.db.query_sample_annotations(1), new_annotations)

    def test_unregister_samples(self):
        register_run(self.run_args, self.db)
        out = io.StringIO()
        sample_file = temp_sample_file(self.samples)
        args = ["1", sample_file.name]
        register_sample_annotations(args, True, self.db, out)

        unregister_samples(["1"], self.db)
        self.assertEqual(self.db._query_nonstandard_annotations(1), {})
        self.assertEqual(self.db.query_sample_accessions(1), [])

    def test_register_sample_types(self):
        f = tempfile.NamedTemporaryFile("wt")
        f.write(SAMPLE_TYPES_TSV)
        f.seek(0)

        register_sample_types([f.name], self.db)
        self.assertEqual(self.db.query_standard_sample_types(),
                         SAMPLE_TYPES_VALS)

        # Add a new sample type and re-register
        new_line = "Extra type	1	Just to test"
        f2 = tempfile.NamedTemporaryFile("wt")
        f2.write(SAMPLE_TYPES_TSV + new_line)
        f2.seek(0)

        register_sample_types([f2.name], self.db)
        self.assertEqual(
            self.db.query_standard_sample_types(),
            SAMPLE_TYPES_VALS + [("Extra type", 1, "Just to test")])

    def test_register_host_species(self):
        f = tempfile.NamedTemporaryFile("wt")
        f.write(HOST_SPECIES_TSV)
        f.seek(0)

        register_host_species([f.name], self.db)
        self.assertEqual(self.db.query_standard_host_species(),
                         HOST_SPECIES_VALS)

        # Add a new species to the file and re-register
        new_line = "Hippo	Test	1243"
        f2 = tempfile.NamedTemporaryFile("wt")
        f2.write(HOST_SPECIES_TSV + new_line)
        f2.seek(0)

        register_host_species([f2.name], self.db)
        self.assertEqual(self.db.query_standard_host_species(),
                         HOST_SPECIES_VALS + [("Hippo", "Test", 1243)])