def test_collect_standard_annotations(self): a = [ (1, "SampleType", "a"), (1, "HostSpecies", "b"), (2, "SubjectID", "c"), ] obs = CoreDb._collect_standard_annotations(a) self.assertEqual(obs, {1: ["a", None, "b"], 2: [None, "c", None]})
def setUp(self): self.db = CoreDb(":memory:") self.db.create_tables() self.run_args = [ "abc", "--lane", "1", "--date", "2008-09-21", "--type", "Illumina-MiSeq", "--comment", "mdsnfa adsf", ] self.samples = [{ "SampleID": "abc123", "BarcodeSequence": "GGGCCT", "SampleType": "Oral swab", "bb": "cd e29", }]
def setUp(self): self.db = CoreDb(":memory:") self.db.create_tables() self.run = ( u"2015-10-11", u"HiSeq", u"Nextera XT", 1, u"run_file.fastq", u"Bob's run", ) self.run_acc = self.db.register_run(*self.run) self.sample_bcs = [ ("Sample1", "ABC"), ("Sample2", "DEF"), ("My.Sample3", "GHI"), ] self.single_sample = self.sample_bcs[0] self.annotations = { "SampleType": "Oral swab", "SubjectID": "Subj23", "study_group": "Healthy", "study_day": "1", }
"""Add samples and runs to the registry""" import argparse import itertools import os import re import sys import gzip from sample_registry.db import CoreDb from sample_registry.mapping import SampleTable from sample_registry.illumina import IlluminaFastq REGISTRY_DATABASE = CoreDb("/var/local/sample_registry/core.db") SAMPLES_DESC = """\ Add new samples to the registry, with annotations. """ ANNOTATIONS_DESC = """\ Replace annotations for samples in the registry. Samples are matched using the sample ID and barcode sequence. """ ANNOTATIONS_EPILOG = """\ **BEWARE USER** This script will replace all existing annotations with those found in the provided file! Make sure this is what you want, or you will be restoring database tables from backup files, as you deserve. You have been warned!!! """
class CoreDbTests(unittest.TestCase): def setUp(self): self.db = CoreDb(":memory:") self.db.create_tables() self.run = ( u"2015-10-11", u"HiSeq", u"Nextera XT", 1, u"run_file.fastq", u"Bob's run", ) self.run_acc = self.db.register_run(*self.run) self.sample_bcs = [ ("Sample1", "ABC"), ("Sample2", "DEF"), ("My.Sample3", "GHI"), ] self.single_sample = self.sample_bcs[0] self.annotations = { "SampleType": "Oral swab", "SubjectID": "Subj23", "study_group": "Healthy", "study_day": "1", } def test_register_run(self): self.assertEqual(self.run_acc, 1) self.assertTrue(self.db.query_run_exists(self.run_acc)) obs_run = self.db._query_run(self.run_acc) self.assertEqual(self.run, obs_run) # Registering the run twice should raise an error self.assertRaises(ValueError, self.db.register_run, *self.run) def test_query_run_exists(self): self.assertTrue(self.db.query_run_exists(1)) def test_register_samples(self): # Here, accessions given by database cursor. In other tests, # we double-check that we can actually find the samples in a # query registered_accessions = self.db.register_samples( 1, self.sample_bcs) self.assertEqual(registered_accessions, [1, 2, 3]) # Registering the samples again should raise an error self.assertRaises( ValueError, self.db.register_samples, 1, self.sample_bcs) def test_query_barcoded_sample_accessions(self): self.db.register_samples(1, self.sample_bcs) self.assertEqual( self.db.query_barcoded_sample_accessions(1, self.sample_bcs), [1, 2, 3]) def test_query_sample_accessions(self): self.db.register_samples(1, self.sample_bcs) self.assertEqual( self.db.query_sample_accessions(1), [1, 2, 3]) def test_remove_samples(self): self.db.register_samples(1, self.sample_bcs) self.db.remove_samples([1, 2, 3]) self.assertEqual(self.db.query_sample_accessions(1), []) def test_register_and_remove_annotations(self): sample_accessions = self.db.register_samples(1, self.sample_bcs) for acc in sample_accessions: ann = [(acc, k, v) for k, v in self.annotations.items()] self.db.register_annotations(ann) self.db.remove_annotations(sample_accessions) for acc in sample_accessions: self.assertEqual( self.db.query_sample_annotations(acc), {}) def test_register_and_query_annotations(self): self.db.register_samples(1, [("Sample1", "GGCCTT")]) ann = [(1, k, v) for k, v in self.annotations.items()] self.db.register_annotations(ann) self.assertEqual( self.db.query_sample_annotations(1), self.annotations) def test_collect_standard_annotations(self): a = [ (1, "SampleType", "a"), (1, "HostSpecies", "b"), (2, "SubjectID", "c"), ] obs = CoreDb._collect_standard_annotations(a) self.assertEqual(obs, {1: ["a", None, "b"], 2: [None, "c", None]})
class RegisterScriptTests(unittest.TestCase): def setUp(self): self.db = CoreDb(":memory:") self.db.create_tables() self.run_args = [ "abc", "--lane", "1", "--date", "2008-09-21", "--type", "Illumina-MiSeq", "--comment", "mdsnfa adsf", ] self.samples = [{ "SampleID": "abc123", "BarcodeSequence": "GGGCCT", "SampleType": "Oral swab", "bb": "cd e29", }] def test_rgister_run(self): out = io.StringIO() register_run(self.run_args, self.db, out) # Check that accession number is printed self.assertEqual( out.getvalue(), "Registered run 1 in the database\n" ) # Check that attributes are saved in the database self.assertEqual(self.db._query_run(1), ( u'2008-09-21', u'Illumina-MiSeq', u'Nextera XT', 1, u'abc', u'mdsnfa adsf')) def test_register_illumina_file(self): tmp_dir = tempfile.mkdtemp() fastq_dir = ( "Miseq/160511_M03543_0047_000000000-APE6Y/Data/Intensities/" "BaseCalls") fastq_name = "Undetermined_S0_L001_R1_001.fastq.gz" os.makedirs(os.path.join(tmp_dir, fastq_dir)) relative_fp = os.path.join(fastq_dir, fastq_name) absolute_fp = os.path.join(tmp_dir, relative_fp) f = gzip.GzipFile(absolute_fp, "w") f.write("@M03543:21:C8LJ2ANXX:1:2209:1084:2044 1:N:0:NNNNNNNN+NNNNNNNN") f.close() out = io.StringIO() original_cwd = os.getcwd() os.chdir(tmp_dir) try: register_illumina_file( [relative_fp, "abcd efg"], self.db, out) finally: os.chdir(original_cwd) shutil.rmtree(tmp_dir) self.assertEqual(self.db._query_run(1), ( u'2016-05-11', u'Illumina-MiSeq', u'Nextera XT', 1, unicode(relative_fp), u'abcd efg')) def test_register_samples(self): register_run(self.run_args, self.db) out = io.StringIO() sample_file = temp_sample_file(self.samples) args = ["1", sample_file.name] register_sample_annotations(args, True, self.db, out) # Check that accession number is assigned obs_accessions = self.db.query_barcoded_sample_accessions( 1, [("abc123", "GGGCCT")]) self.assertEqual(obs_accessions, [1]) # Check that annotations are saved to the database self.assertEqual( self.db.query_sample_annotations(1), {"SampleType": "Oral swab", "bb": "cd e29"}) def test_register_annotations(self): register_run(self.run_args, self.db) sample_file = temp_sample_file(self.samples) args = [ "1", sample_file.name] register_sample_annotations(args, True, self.db) # Update SampleType, add fg new_annotations = {"SampleType": "Feces", "fg": "hi5 34"} modified_samples = [x.copy() for x in self.samples] modified_samples[0].update(new_annotations) # Remove bb del modified_samples[0]["bb"] sample_file = temp_sample_file(modified_samples) args = ["1", sample_file.name] register_sample_annotations(args, False, self.db) self.assertEqual( self.db.query_sample_annotations(1), new_annotations) def test_unregister_samples(self): register_run(self.run_args, self.db) out = io.StringIO() sample_file = temp_sample_file(self.samples) args = ["1", sample_file.name] register_sample_annotations(args, True, self.db, out) unregister_samples(["1"], self.db) self.assertEqual(self.db._query_nonstandard_annotations(1), {}) self.assertEqual(self.db.query_sample_accessions(1), []) def test_register_sample_types(self): f = tempfile.NamedTemporaryFile("wt") f.write(SAMPLE_TYPES_TSV) f.seek(0) register_sample_types([f.name], self.db) self.assertEqual( self.db.query_standard_sample_types(), SAMPLE_TYPES_VALS) # Add a new sample type and re-register new_line = "Extra type 1 Just to test" f2 = tempfile.NamedTemporaryFile("wt") f2.write(SAMPLE_TYPES_TSV + new_line) f2.seek(0) register_sample_types([f2.name], self.db) self.assertEqual( self.db.query_standard_sample_types(), SAMPLE_TYPES_VALS + [("Extra type", 1, "Just to test")]) def test_register_host_species(self): f = tempfile.NamedTemporaryFile("wt") f.write(HOST_SPECIES_TSV) f.seek(0) register_host_species([f.name], self.db) self.assertEqual( self.db.query_standard_host_species(), HOST_SPECIES_VALS) # Add a new species to the file and re-register new_line = "Hippo Test 1243" f2 = tempfile.NamedTemporaryFile("wt") f2.write(HOST_SPECIES_TSV + new_line) f2.seek(0) register_host_species([f2.name], self.db) self.assertEqual( self.db.query_standard_host_species(), HOST_SPECIES_VALS + [("Hippo", "Test", 1243)])
class CoreDbTests(unittest.TestCase): def setUp(self): self.db = CoreDb(":memory:") self.db.create_tables() self.run = ( u"2015-10-11", u"HiSeq", u"Nextera XT", 1, u"run_file.fastq", u"Bob's run", ) self.run_acc = self.db.register_run(*self.run) self.sample_bcs = [ ("Sample1", "ABC"), ("Sample2", "DEF"), ("My.Sample3", "GHI"), ] self.single_sample = self.sample_bcs[0] self.annotations = { "SampleType": "Oral swab", "SubjectID": "Subj23", "study_group": "Healthy", "study_day": "1", } def test_register_run(self): self.assertEqual(self.run_acc, 1) self.assertTrue(self.db.query_run_exists(self.run_acc)) obs_run = self.db._query_run(self.run_acc) self.assertEqual(self.run, obs_run) # Registering the run twice should raise an error self.assertRaises(ValueError, self.db.register_run, *self.run) def test_query_run_exists(self): self.assertTrue(self.db.query_run_exists(1)) def test_register_samples(self): # Here, accessions given by database cursor. In other tests, # we double-check that we can actually find the samples in a # query registered_accessions = self.db.register_samples(1, self.sample_bcs) self.assertEqual(registered_accessions, [1, 2, 3]) # Registering the samples again should raise an error self.assertRaises(ValueError, self.db.register_samples, 1, self.sample_bcs) def test_query_barcoded_sample_accessions(self): self.db.register_samples(1, self.sample_bcs) self.assertEqual( self.db.query_barcoded_sample_accessions(1, self.sample_bcs), [1, 2, 3]) def test_query_sample_accessions(self): self.db.register_samples(1, self.sample_bcs) self.assertEqual(self.db.query_sample_accessions(1), [1, 2, 3]) def test_remove_samples(self): self.db.register_samples(1, self.sample_bcs) self.db.remove_samples([1, 2, 3]) self.assertEqual(self.db.query_sample_accessions(1), []) def test_register_and_remove_annotations(self): sample_accessions = self.db.register_samples(1, self.sample_bcs) for acc in sample_accessions: ann = [(acc, k, v) for k, v in self.annotations.items()] self.db.register_annotations(ann) self.db.remove_annotations(sample_accessions) for acc in sample_accessions: self.assertEqual(self.db.query_sample_annotations(acc), {}) def test_register_and_query_annotations(self): self.db.register_samples(1, [("Sample1", "GGCCTT")]) ann = [(1, k, v) for k, v in self.annotations.items()] self.db.register_annotations(ann) self.assertEqual(self.db.query_sample_annotations(1), self.annotations) def test_collect_standard_annotations(self): a = [ (1, "SampleType", "a"), (1, "HostSpecies", "b"), (2, "SubjectID", "c"), ] obs = CoreDb._collect_standard_annotations(a) self.assertEqual(obs, {1: ["a", None, "b"], 2: [None, "c", None]})
class RegisterScriptTests(unittest.TestCase): def setUp(self): self.db = CoreDb(":memory:") self.db.create_tables() self.run_args = [ "abc", "--lane", "1", "--date", "2008-09-21", "--type", "Illumina-MiSeq", "--comment", "mdsnfa adsf", ] self.samples = [{ "SampleID": "abc123", "BarcodeSequence": "GGGCCT", "SampleType": "Oral swab", "bb": "cd e29", }] def test_rgister_run(self): out = io.StringIO() register_run(self.run_args, self.db, out) # Check that accession number is printed self.assertEqual(out.getvalue(), "Registered run 1 in the database\n") # Check that attributes are saved in the database self.assertEqual(self.db._query_run(1), (u'2008-09-21', u'Illumina-MiSeq', u'Nextera XT', 1, u'abc', u'mdsnfa adsf')) def test_register_illumina_file(self): tmp_dir = tempfile.mkdtemp() fastq_dir = ( "Miseq/160511_M03543_0047_000000000-APE6Y/Data/Intensities/" "BaseCalls") fastq_name = "Undetermined_S0_L001_R1_001.fastq.gz" os.makedirs(os.path.join(tmp_dir, fastq_dir)) relative_fp = os.path.join(fastq_dir, fastq_name) absolute_fp = os.path.join(tmp_dir, relative_fp) f = gzip.GzipFile(absolute_fp, "w") f.write( "@M03543:21:C8LJ2ANXX:1:2209:1084:2044 1:N:0:NNNNNNNN+NNNNNNNN") f.close() out = io.StringIO() original_cwd = os.getcwd() os.chdir(tmp_dir) try: register_illumina_file([relative_fp, "abcd efg"], self.db, out) finally: os.chdir(original_cwd) shutil.rmtree(tmp_dir) self.assertEqual(self.db._query_run(1), (u'2016-05-11', u'Illumina-MiSeq', u'Nextera XT', 1, unicode(relative_fp), u'abcd efg')) def test_register_samples(self): register_run(self.run_args, self.db) out = io.StringIO() sample_file = temp_sample_file(self.samples) args = ["1", sample_file.name] register_sample_annotations(args, True, self.db, out) # Check that accession number is assigned obs_accessions = self.db.query_barcoded_sample_accessions( 1, [("abc123", "GGGCCT")]) self.assertEqual(obs_accessions, [1]) # Check that annotations are saved to the database self.assertEqual(self.db.query_sample_annotations(1), { "SampleType": "Oral swab", "bb": "cd e29" }) def test_register_annotations(self): register_run(self.run_args, self.db) sample_file = temp_sample_file(self.samples) args = ["1", sample_file.name] register_sample_annotations(args, True, self.db) # Update SampleType, add fg new_annotations = {"SampleType": "Feces", "fg": "hi5 34"} modified_samples = [x.copy() for x in self.samples] modified_samples[0].update(new_annotations) # Remove bb del modified_samples[0]["bb"] sample_file = temp_sample_file(modified_samples) args = ["1", sample_file.name] register_sample_annotations(args, False, self.db) self.assertEqual(self.db.query_sample_annotations(1), new_annotations) def test_unregister_samples(self): register_run(self.run_args, self.db) out = io.StringIO() sample_file = temp_sample_file(self.samples) args = ["1", sample_file.name] register_sample_annotations(args, True, self.db, out) unregister_samples(["1"], self.db) self.assertEqual(self.db._query_nonstandard_annotations(1), {}) self.assertEqual(self.db.query_sample_accessions(1), []) def test_register_sample_types(self): f = tempfile.NamedTemporaryFile("wt") f.write(SAMPLE_TYPES_TSV) f.seek(0) register_sample_types([f.name], self.db) self.assertEqual(self.db.query_standard_sample_types(), SAMPLE_TYPES_VALS) # Add a new sample type and re-register new_line = "Extra type 1 Just to test" f2 = tempfile.NamedTemporaryFile("wt") f2.write(SAMPLE_TYPES_TSV + new_line) f2.seek(0) register_sample_types([f2.name], self.db) self.assertEqual( self.db.query_standard_sample_types(), SAMPLE_TYPES_VALS + [("Extra type", 1, "Just to test")]) def test_register_host_species(self): f = tempfile.NamedTemporaryFile("wt") f.write(HOST_SPECIES_TSV) f.seek(0) register_host_species([f.name], self.db) self.assertEqual(self.db.query_standard_host_species(), HOST_SPECIES_VALS) # Add a new species to the file and re-register new_line = "Hippo Test 1243" f2 = tempfile.NamedTemporaryFile("wt") f2.write(HOST_SPECIES_TSV + new_line) f2.seek(0) register_host_species([f2.name], self.db) self.assertEqual(self.db.query_standard_host_species(), HOST_SPECIES_VALS + [("Hippo", "Test", 1243)])