def upload_qc(self): if not self._check_pargs(['flowcell']): return url = self.pargs.url if self.pargs.url else self.app.config.get( "db", "url") if not url: self.app.log.warn("Please provide a valid url: got {}".format(url)) return if not validate_fc_directory_format(self.pargs.flowcell): self.app.log.warn( "Path '{}' does not conform to bcbio flowcell directory format; aborting" .format(self.pargs.flowcell)) return runinfo_csv = os.path.join(os.path.abspath(self.pargs.flowcell), "{}.csv".format(fc_id(self.pargs.flowcell))) runinfo_yaml = os.path.join(os.path.abspath(self.pargs.flowcell), "run_info.yaml") (fc_date, fc_name) = fc_parts(self.pargs.flowcell) if int(fc_date) < 120815: self.log.info( "Assuming pre-casava based file structure for {}".format( fc_id(self.pargs.flowcell))) qc_objects = self._collect_pre_casava_qc() else: self.log.info("Assuming casava based file structure for {}".format( fc_id(self.pargs.flowcell))) qc_objects = self._collect_casava_qc() if len(qc_objects) == 0: self.log.info("No out-of-date qc objects for {}".format( fc_id(self.pargs.flowcell))) return else: self.log.info("Retrieved {} updated qc objects".format( len(qc_objects))) s_con = SampleRunMetricsConnection(dbname=self.app.config.get( "db", "samples"), **vars(self.app.pargs)) fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get( "db", "flowcells"), **vars(self.app.pargs)) p_con = ProjectSummaryConnection(dbname=self.app.config.get( "db", "projects"), **vars(self.app.pargs)) for obj in qc_objects: if self.app.pargs.debug: self.log.debug("{}: {}".format(str(obj), obj["_id"])) if isinstance(obj, FlowcellRunMetricsDocument): dry("Saving object {}".format(repr(obj)), fc_con.save(obj)) if isinstance(obj, SampleRunMetricsDocument): project_sample = p_con.get_project_sample( obj.get("sample_prj", None), obj.get("barcode_name", None), self.pargs.extensive_matching) if project_sample: obj["project_sample_name"] = project_sample['sample_name'] dry("Saving object {}".format(repr(obj)), s_con.save(obj))
def setUpClass(cls): # Temporarily add new sample for use in exclusion tests s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") s = s_con.get_entry("1_121015_BB002BBBXX_TGACCA") kw = copy.deepcopy(s) del kw["_id"] new_s = SampleRunMetricsDocument(**kw) new_s["sequence"] = "AGTTGA" new_s["name"] = "1_121015_BB002BBBXX_AGTTGA" s_con.save(new_s) kw = copy.deepcopy(s) del kw["_id"] new_s = SampleRunMetricsDocument(**kw) new_s["sample_prj"] = "j-doe_00_01" new_s["sequence"] = "CGAACG" new_s["name"] = "1_121015_BB002BBBXX_CGAACG" s_con.save(new_s) s = s_con.get_entry("3_120924_AC003CCCXX_ACAGTG") kw = copy.deepcopy(s) del kw["_id"] new_s = SampleRunMetricsDocument(**kw) new_s["sample_prj"] = "j-doe_00_02" new_s["sequence"] = "GGAAGG" new_s["name"] = "3_120924_AC003CCCXX_GGAAGG" s_con.save(new_s)
def update(self): if not self._check_pargs(["sample_prj"]): return url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url") if not url: self.app.log.warn("Please provide a valid url: got {}".format(url)) return s_con = SampleRunMetricsConnection(dbname=self.app.config.get("db", "samples"), **vars(self.app.pargs)) samples = s_con.get_samples(sample_prj=self.pargs.sample_prj) if self.pargs.project_id: self.app.log.debug("Going to update 'project_id' to {} for sample runs with 'sample_prj' == {}".format(self.pargs.project_id, self.pargs.sample_prj)) for s in samples: if not s.get("project_id", None) is None: if not query_yes_no("'project_id':{} for sample {}; are you sure you want to overwrite?".format(s["project_id"], s["name"]), force=self.pargs.force): continue s["project_id"] = self.pargs.project_id s_con.save(s) if self.pargs.names: self.app.log.debug("Going to update 'project_sample_name' for sample runs with 'sample_prj' == {}".format(self.pargs.sample_prj)) if os.path.exists(self.pargs.names): with open(self.pargs.names) as fh: names_d = json.load(fh) else: names_d= ast.literal_eval(self.pargs.names) samples_sort = sorted(samples, key=lambda s:s["barcode_name"]) groups = {} for k, g in itertools.groupby(samples_sort, key=lambda x:x["barcode_name"]): groups[k] = list(g) for barcode_name in names_d: sample_list = groups.get(barcode_name, None) if not sample_list: continue for s in sample_list: if not s.get("project_sample_name", None) is None: if not query_yes_no("'project_sample_name':{} for sample {}; are you sure you want to overwrite?".format(s["project_sample_name"], s["name"]), force=self.pargs.force): continue s["project_sample_name"] = names_d[barcode_name] s_con.save(s) else: self.app.log.info("Trying to use extensive matching...") p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs)) project_name = self.pargs.sample_prj if self.pargs.project_alias: project_name = self.pargs.project_alias for s in samples: project_sample = p_con.get_project_sample(project_name, s["barcode_name"], extensive_matching=True) if project_sample: self.app.log.info("using mapping '{} : {}'...".format(s["barcode_name"], project_sample["sample_name"])) s["project_sample_name"] = project_sample["sample_name"] s_con.save(s)
def update(self): if not self._check_pargs(["sample_prj"]): return url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url") if not url: self.app.log.warn("Please provide a valid url: got {}".format(url)) return s_con = SampleRunMetricsConnection(dbname=self.app.config.get("db", "samples"), **vars(self.app.pargs)) samples = s_con.get_samples(sample_prj=self.pargs.sample_prj) if self.pargs.project_id: self.app.log.debug("Going to update 'project_id' to {} for sample runs with 'sample_prj' == {}".format(self.pargs.project_id, self.pargs.sample_prj)) for s in samples: if not s.get("project_id", None) is None: if not query_yes_no("'project_id':{} for sample {}; are you sure you want to overwrite?".format(s["project_id"], s["name"]), force=self.pargs.force): continue s["project_id"] = self.pargs.project_id s_con.save(s) if self.pargs.names: self.app.log.debug("Going to update 'project_sample_name' for sample runs with 'sample_prj' == {}".format(self.pargs.sample_prj)) if os.path.exists(self.pargs.names): with open(self.pargs.names) as fh: names_d = json.load(fh) else: names_d= ast.literal_eval(self.pargs.names) samples_sort = sorted(samples, key=lambda s:s["barcode_name"]) groups = {} for k, g in itertools.groupby(samples_sort, key=lambda x:x["barcode_name"]): groups[k] = list(g) for barcode_name in names_d: sample_list = groups.get(barcode_name, None) if not sample_list: continue for s in sample_list: if not s.get("project_sample_name", None) is None: if not query_yes_no("'project_sample_name':{} for sample {}; are you sure you want to overwrite?".format(s["project_sample_name"], s["name"]), force=self.pargs.force): continue s["project_sample_name"] = names_d[barcode_name] s_con.save(s) else: self.app.log.info("Trying to use extensive matching...") p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs)) project_name = self.pargs.sample_prj if self.pargs.project_alias: project_name = self.pargs.project_alias for s in samples: project_sample = p_con.get_project_sample(project_name, s["barcode_name"], extensive_matching=True) if project_sample: self.app.log.info("using mapping '{} : {}'...".format(s["barcode_name"], project_sample["sample_name"])) s["project_sample_name"] = project_sample["sample_name"] s_con.save(s)
def upload_qc(self): if not self._check_pargs(["flowcell"]): return url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url") if not url: self.app.log.warn("Please provide a valid url: got {}".format(url)) return if not validate_fc_directory_format(self.pargs.flowcell): self.app.log.warn( "Path '{}' does not conform to bcbio flowcell directory format; aborting".format(self.pargs.flowcell) ) return runinfo_csv = os.path.join(os.path.abspath(self.pargs.flowcell), "{}.csv".format(fc_id(self.pargs.flowcell))) runinfo_yaml = os.path.join(os.path.abspath(self.pargs.flowcell), "run_info.yaml") (fc_date, fc_name) = fc_parts(self.pargs.flowcell) if int(fc_date) < 120815: self.log.info("Assuming pre-casava based file structure for {}".format(fc_id(self.pargs.flowcell))) qc_objects = self._collect_pre_casava_qc() else: self.log.info("Assuming casava based file structure for {}".format(fc_id(self.pargs.flowcell))) qc_objects = self._collect_casava_qc() if len(qc_objects) == 0: self.log.info("No out-of-date qc objects for {}".format(fc_id(self.pargs.flowcell))) return else: self.log.info("Retrieved {} updated qc objects".format(len(qc_objects))) s_con = SampleRunMetricsConnection(dbname=self.app.config.get("db", "samples"), **vars(self.app.pargs)) fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs)) p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs)) for obj in qc_objects: if self.app.pargs.debug: self.log.debug("{}: {}".format(str(obj), obj["_id"])) if isinstance(obj, FlowcellRunMetricsDocument): dry("Saving object {}".format(repr(obj)), fc_con.save(obj)) if isinstance(obj, SampleRunMetricsDocument): project_sample = p_con.get_project_sample( obj.get("sample_prj", None), obj.get("barcode_name", None), self.pargs.extensive_matching ) if project_sample: obj["project_sample_name"] = project_sample["sample_name"] dry("Saving object {}".format(repr(obj)), s_con.save(obj))
class TestQCUpload(PmFullTest): def setUp(self): """FIXME: All other tests depend on data being uploaded, so these are not real unit tests. The setup to TestQCUpload has to be run prior to other tests, else unexpected failures will occur.""" self.app = self.make_app( argv=['qc', 'upload-qc', flowcells[0], '--mtime', '10000'], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() self.app = self.make_app( argv=['qc', 'upload-qc', flowcells[1], '--mtime', '10000'], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() self.s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") self.p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******") self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") def test_samplesheet(self): """Test samplesheet upload""" fc = self.fc_con.get_entry("120924_AC003CCCXX") self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA") self.assertEqual(fc["samplesheet_csv"][0]["Description"], "J__Doe_00_01") self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX") self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19") self.assertEqual(fc["samplesheet_csv"][2]["SampleID"], "P002_101_index3") def test_qc_upload(self): """Test running qc upload to server. Slightly circular testing here - I setup the module with qc update so by definition the test must 'work'""" self.app = self.make_app( argv=['qc', 'upload-qc', flowcells[1], '--mtime', '100'], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertIsNone(s["project_sample_name"]) self.assertEqual(s["project_id"], "P003") def test_qc_update(self): """Test running qc update of a project id""" s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") s["project_id"] = None self.assertIsNone(s["project_id"]) self.s_con.save(s) self.app = self.make_app(argv=[ 'qc', 'update', '--sample_prj', projects[2], '--project_id', 'P003', '--debug', '--force' ], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertEqual(s["project_id"], "P003") def test_qc_update_sample_names(self): """Test running qc update of project sample names""" s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") s1["project_sample_name"] = None s2["project_sample_name"] = None self.assertIsNone(s1["project_sample_name"]) self.assertIsNone(s2["project_sample_name"]) self.s_con.save(s1) self.s_con.save(s2) sample_map = { 'P001_101_index3': 'P001_101_index3', 'P001_102_index6': 'P001_102' } self.app = self.make_app(argv=[ 'qc', 'update', '--sample_prj', projects[0], '--names', "{}".format(sample_map), '--debug', '--force' ], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") self.assertEqual(s1["project_sample_name"], "P001_101_index3") self.assertEqual(s2["project_sample_name"], "P001_102")
class TestQCUpload(PmFullTest): def setUp(self): self.app = self.make_app( argv=["qc", "upload-qc", flowcells[0], "--mtime", "10000"], extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"], ) self._run_app() self.s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") self.p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******") self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") def test_samplesheet(self): """Test samplesheet upload""" fc = self.fc_con.get_entry("120924_AC003CCCXX") self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA") self.assertEqual(fc["samplesheet_csv"][0]["Description"], "J__Doe_00_01") self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX") self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19") self.assertEqual(fc["samplesheet_csv"][2]["SampleID"], "P001_101_index3") def test_qc_upload(self): """Test running qc upload to server""" self.app = self.make_app( argv=["qc", "upload-qc", flowcells[1], "--mtime", "100"], extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"], ) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertIsNone(s["project_sample_name"]) self.assertEqual(s["project_id"], "P003") def test_qc_update(self): """Test running qc update of a project id""" s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") s["project_id"] = None self.assertIsNone(s["project_id"]) self.s_con.save(s) self.app = self.make_app( argv=["qc", "update", "--sample_prj", projects[2], "--project_id", "P003", "--debug", "--force"], extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"], ) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertEqual(s["project_id"], "P003") def test_qc_update_sample_names(self): """Test running qc update of project sample names""" s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") s1["project_sample_name"] = None s2["project_sample_name"] = None self.assertIsNone(s1["project_sample_name"]) self.assertIsNone(s2["project_sample_name"]) self.s_con.save(s1) self.s_con.save(s2) sample_map = {"P001_101_index3": "P001_101_index3", "P001_102_index6": "P001_102"} self.app = self.make_app( argv=[ "qc", "update", "--sample_prj", projects[0], "--names", "{}".format(sample_map), "--debug", "--force", ], extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"], ) self._run_app() s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") self.assertEqual(s1["project_sample_name"], "P001_101_index3") self.assertEqual(s2["project_sample_name"], "P001_102")
class TestQCUpload(PmFullTest): def setUp(self): """FIXME: All other tests depend on data being uploaded, so these are not real unit tests. The setup to TestQCUpload has to be run prior to other tests, else unexpected failures will occur.""" self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[0], '--mtime', '10000'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[1], '--mtime', '10000'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() self.s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") self.p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******") self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") def test_samplesheet(self): """Test samplesheet upload""" fc = self.fc_con.get_entry("120924_AC003CCCXX") self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA") self.assertEqual(fc["samplesheet_csv"][0]["Description"], "J__Doe_00_01") self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX") self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19") self.assertEqual(fc["samplesheet_csv"][2]["SampleID"], "P002_101_index3") def test_qc_upload(self): """Test running qc upload to server. Slightly circular testing here - I setup the module with qc update so by definition the test must 'work'""" self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[1], '--mtime', '100'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertIsNone(s["project_sample_name"]) self.assertEqual(s["project_id"], "P003") def test_qc_update(self): """Test running qc update of a project id""" s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") s["project_id"]= None self.assertIsNone(s["project_id"]) self.s_con.save(s) self.app = self.make_app(argv = ['qc', 'update', '--sample_prj', projects[2], '--project_id', 'P003', '--debug', '--force'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertEqual(s["project_id"], "P003") def test_qc_update_sample_names(self): """Test running qc update of project sample names""" s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") s1["project_sample_name"] = None s2["project_sample_name"] = None self.assertIsNone(s1["project_sample_name"]) self.assertIsNone(s2["project_sample_name"]) self.s_con.save(s1) self.s_con.save(s2) sample_map = {'P001_101_index3': 'P001_101_index3', 'P001_102_index6':'P001_102'} self.app = self.make_app(argv = ['qc', 'update', '--sample_prj', projects[0], '--names', "{}".format(sample_map), '--debug', '--force'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") self.assertEqual(s1["project_sample_name"], "P001_101_index3") self.assertEqual(s2["project_sample_name"], "P001_102")