Esempio n. 1
0
    def upload_qc(self):
        if not self._check_pargs(['flowcell']):
            return
        url = self.pargs.url if self.pargs.url else self.app.config.get(
            "db", "url")
        if not url:
            self.app.log.warn("Please provide a valid url: got {}".format(url))
            return
        if not validate_fc_directory_format(self.pargs.flowcell):
            self.app.log.warn(
                "Path '{}' does not conform to bcbio flowcell directory format; aborting"
                .format(self.pargs.flowcell))
            return

        runinfo_csv = os.path.join(os.path.abspath(self.pargs.flowcell),
                                   "{}.csv".format(fc_id(self.pargs.flowcell)))
        runinfo_yaml = os.path.join(os.path.abspath(self.pargs.flowcell),
                                    "run_info.yaml")
        (fc_date, fc_name) = fc_parts(self.pargs.flowcell)
        if int(fc_date) < 120815:
            self.log.info(
                "Assuming pre-casava based file structure for {}".format(
                    fc_id(self.pargs.flowcell)))
            qc_objects = self._collect_pre_casava_qc()
        else:
            self.log.info("Assuming casava based file structure for {}".format(
                fc_id(self.pargs.flowcell)))
            qc_objects = self._collect_casava_qc()

        if len(qc_objects) == 0:
            self.log.info("No out-of-date qc objects for {}".format(
                fc_id(self.pargs.flowcell)))
            return
        else:
            self.log.info("Retrieved {} updated qc objects".format(
                len(qc_objects)))

        s_con = SampleRunMetricsConnection(dbname=self.app.config.get(
            "db", "samples"),
                                           **vars(self.app.pargs))
        fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get(
            "db", "flowcells"),
                                              **vars(self.app.pargs))
        p_con = ProjectSummaryConnection(dbname=self.app.config.get(
            "db", "projects"),
                                         **vars(self.app.pargs))
        for obj in qc_objects:
            if self.app.pargs.debug:
                self.log.debug("{}: {}".format(str(obj), obj["_id"]))
            if isinstance(obj, FlowcellRunMetricsDocument):
                dry("Saving object {}".format(repr(obj)), fc_con.save(obj))
            if isinstance(obj, SampleRunMetricsDocument):
                project_sample = p_con.get_project_sample(
                    obj.get("sample_prj", None), obj.get("barcode_name", None),
                    self.pargs.extensive_matching)
                if project_sample:
                    obj["project_sample_name"] = project_sample['sample_name']
                dry("Saving object {}".format(repr(obj)), s_con.save(obj))
Esempio n. 2
0
    def setUpClass(cls):
        # Temporarily add new sample for use in exclusion tests
        s_con = SampleRunMetricsConnection(dbname="samples-test",
                                           username="******",
                                           password="******")
        s = s_con.get_entry("1_121015_BB002BBBXX_TGACCA")
        kw = copy.deepcopy(s)
        del kw["_id"]
        new_s = SampleRunMetricsDocument(**kw)
        new_s["sequence"] = "AGTTGA"
        new_s["name"] = "1_121015_BB002BBBXX_AGTTGA"
        s_con.save(new_s)

        kw = copy.deepcopy(s)
        del kw["_id"]
        new_s = SampleRunMetricsDocument(**kw)
        new_s["sample_prj"] = "j-doe_00_01"
        new_s["sequence"] = "CGAACG"
        new_s["name"] = "1_121015_BB002BBBXX_CGAACG"
        s_con.save(new_s)

        s = s_con.get_entry("3_120924_AC003CCCXX_ACAGTG")
        kw = copy.deepcopy(s)
        del kw["_id"]
        new_s = SampleRunMetricsDocument(**kw)
        new_s["sample_prj"] = "j-doe_00_02"
        new_s["sequence"] = "GGAAGG"
        new_s["name"] = "3_120924_AC003CCCXX_GGAAGG"
        s_con.save(new_s)
Esempio n. 3
0
    def update(self):
        if not self._check_pargs(["sample_prj"]):
            return
        url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
        if not url:
            self.app.log.warn("Please provide a valid url: got {}".format(url))
            return

        s_con = SampleRunMetricsConnection(dbname=self.app.config.get("db", "samples"), **vars(self.app.pargs))
        samples = s_con.get_samples(sample_prj=self.pargs.sample_prj)

        if self.pargs.project_id:
            self.app.log.debug("Going to update 'project_id' to {} for sample runs with 'sample_prj' == {}".format(self.pargs.project_id, self.pargs.sample_prj))
            for s in samples:
                if not s.get("project_id", None) is None:
                    if not query_yes_no("'project_id':{} for sample {}; are you sure you want to overwrite?".format(s["project_id"], s["name"]), force=self.pargs.force):
                        continue
                s["project_id"] = self.pargs.project_id
                s_con.save(s)
        if self.pargs.names:
            self.app.log.debug("Going to update 'project_sample_name' for sample runs with 'sample_prj' == {}".format(self.pargs.sample_prj))
            if os.path.exists(self.pargs.names):
                with open(self.pargs.names) as fh:
                    names_d = json.load(fh)
            else:
                names_d= ast.literal_eval(self.pargs.names)
            samples_sort = sorted(samples, key=lambda s:s["barcode_name"])
            groups = {}
            for k, g in itertools.groupby(samples_sort, key=lambda x:x["barcode_name"]):
                groups[k] = list(g)
            for barcode_name in names_d:
                sample_list = groups.get(barcode_name, None)
                if not sample_list:
                    continue
                for s in sample_list:
                    if not s.get("project_sample_name", None) is None:
                        if not query_yes_no("'project_sample_name':{} for sample {}; are you sure you want to overwrite?".format(s["project_sample_name"], s["name"]), force=self.pargs.force):
                            continue
                    s["project_sample_name"] = names_d[barcode_name]
                    s_con.save(s)
        else:
            self.app.log.info("Trying to use extensive matching...")
            p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs))
            project_name = self.pargs.sample_prj
            if self.pargs.project_alias:
                project_name = self.pargs.project_alias
            for s in samples:
                project_sample = p_con.get_project_sample(project_name, s["barcode_name"], extensive_matching=True)
                if project_sample:
                    self.app.log.info("using mapping '{} : {}'...".format(s["barcode_name"], project_sample["sample_name"]))
                    s["project_sample_name"] = project_sample["sample_name"]
                    s_con.save(s)
Esempio n. 4
0
    def update(self):
        if not self._check_pargs(["sample_prj"]):
            return
        url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
        if not url:
            self.app.log.warn("Please provide a valid url: got {}".format(url))
            return

        s_con = SampleRunMetricsConnection(dbname=self.app.config.get("db", "samples"), **vars(self.app.pargs))
        samples = s_con.get_samples(sample_prj=self.pargs.sample_prj)

        if self.pargs.project_id:
            self.app.log.debug("Going to update 'project_id' to {} for sample runs with 'sample_prj' == {}".format(self.pargs.project_id, self.pargs.sample_prj))
            for s in samples:
                if not s.get("project_id", None) is None:
                    if not query_yes_no("'project_id':{} for sample {}; are you sure you want to overwrite?".format(s["project_id"], s["name"]), force=self.pargs.force):
                        continue
                s["project_id"] = self.pargs.project_id
                s_con.save(s)
        if self.pargs.names:
            self.app.log.debug("Going to update 'project_sample_name' for sample runs with 'sample_prj' == {}".format(self.pargs.sample_prj))
            if os.path.exists(self.pargs.names):
                with open(self.pargs.names) as fh:
                    names_d = json.load(fh)
            else:
                names_d= ast.literal_eval(self.pargs.names)
            samples_sort = sorted(samples, key=lambda s:s["barcode_name"])
            groups = {}
            for k, g in itertools.groupby(samples_sort, key=lambda x:x["barcode_name"]):
                groups[k] = list(g)
            for barcode_name in names_d:
                sample_list = groups.get(barcode_name, None)
                if not sample_list:
                    continue
                for s in sample_list:
                    if not s.get("project_sample_name", None) is None:
                        if not query_yes_no("'project_sample_name':{} for sample {}; are you sure you want to overwrite?".format(s["project_sample_name"], s["name"]), force=self.pargs.force):
                            continue
                    s["project_sample_name"] = names_d[barcode_name]
                    s_con.save(s)
        else:
            self.app.log.info("Trying to use extensive matching...")
            p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs))
            project_name = self.pargs.sample_prj
            if self.pargs.project_alias:
                project_name = self.pargs.project_alias
            for s in samples:
                project_sample = p_con.get_project_sample(project_name, s["barcode_name"], extensive_matching=True)
                if project_sample:
                    self.app.log.info("using mapping '{} : {}'...".format(s["barcode_name"], project_sample["sample_name"]))
                    s["project_sample_name"] = project_sample["sample_name"]
                    s_con.save(s)
Esempio n. 5
0
    def upload_qc(self):
        if not self._check_pargs(["flowcell"]):
            return
        url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
        if not url:
            self.app.log.warn("Please provide a valid url: got {}".format(url))
            return
        if not validate_fc_directory_format(self.pargs.flowcell):
            self.app.log.warn(
                "Path '{}' does not conform to bcbio flowcell directory format; aborting".format(self.pargs.flowcell)
            )
            return

        runinfo_csv = os.path.join(os.path.abspath(self.pargs.flowcell), "{}.csv".format(fc_id(self.pargs.flowcell)))
        runinfo_yaml = os.path.join(os.path.abspath(self.pargs.flowcell), "run_info.yaml")
        (fc_date, fc_name) = fc_parts(self.pargs.flowcell)
        if int(fc_date) < 120815:
            self.log.info("Assuming pre-casava based file structure for {}".format(fc_id(self.pargs.flowcell)))
            qc_objects = self._collect_pre_casava_qc()
        else:
            self.log.info("Assuming casava based file structure for {}".format(fc_id(self.pargs.flowcell)))
            qc_objects = self._collect_casava_qc()

        if len(qc_objects) == 0:
            self.log.info("No out-of-date qc objects for {}".format(fc_id(self.pargs.flowcell)))
            return
        else:
            self.log.info("Retrieved {} updated qc objects".format(len(qc_objects)))

        s_con = SampleRunMetricsConnection(dbname=self.app.config.get("db", "samples"), **vars(self.app.pargs))
        fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs))
        p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs))
        for obj in qc_objects:
            if self.app.pargs.debug:
                self.log.debug("{}: {}".format(str(obj), obj["_id"]))
            if isinstance(obj, FlowcellRunMetricsDocument):
                dry("Saving object {}".format(repr(obj)), fc_con.save(obj))
            if isinstance(obj, SampleRunMetricsDocument):
                project_sample = p_con.get_project_sample(
                    obj.get("sample_prj", None), obj.get("barcode_name", None), self.pargs.extensive_matching
                )
                if project_sample:
                    obj["project_sample_name"] = project_sample["sample_name"]
                dry("Saving object {}".format(repr(obj)), s_con.save(obj))
Esempio n. 6
0
class TestQCUpload(PmFullTest):
    def setUp(self):
        """FIXME: All other tests depend on data being uploaded, so
        these are not real unit tests. The setup to TestQCUpload has to
        be run prior to other tests, else unexpected failures will
        occur."""
        self.app = self.make_app(
            argv=['qc', 'upload-qc', flowcells[0], '--mtime', '10000'],
            extensions=[
                'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb'
            ])
        self._run_app()
        self.app = self.make_app(
            argv=['qc', 'upload-qc', flowcells[1], '--mtime', '10000'],
            extensions=[
                'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb'
            ])
        self._run_app()
        self.s_con = SampleRunMetricsConnection(dbname="samples-test",
                                                username="******",
                                                password="******")
        self.p_con = ProjectSummaryConnection(dbname="projects-test",
                                              username="******",
                                              password="******")
        self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test",
                                                   username="******",
                                                   password="******")

    def test_samplesheet(self):
        """Test samplesheet upload"""
        fc = self.fc_con.get_entry("120924_AC003CCCXX")
        self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA")
        self.assertEqual(fc["samplesheet_csv"][0]["Description"],
                         "J__Doe_00_01")
        self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX")
        self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19")
        self.assertEqual(fc["samplesheet_csv"][2]["SampleID"],
                         "P002_101_index3")

    def test_qc_upload(self):
        """Test running qc upload to server. Slightly circular testing
        here - I setup the module with qc update so by definition the
        test must 'work'"""
        self.app = self.make_app(
            argv=['qc', 'upload-qc', flowcells[1], '--mtime', '100'],
            extensions=[
                'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb'
            ])
        self._run_app()
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        self.assertIsNone(s["project_sample_name"])
        self.assertEqual(s["project_id"], "P003")

    def test_qc_update(self):
        """Test running qc update of a project id"""
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        s["project_id"] = None
        self.assertIsNone(s["project_id"])
        self.s_con.save(s)
        self.app = self.make_app(argv=[
            'qc', 'update', '--sample_prj', projects[2], '--project_id',
            'P003', '--debug', '--force'
        ],
                                 extensions=[
                                     'scilifelab.pm.ext.ext_qc',
                                     'scilifelab.pm.ext.ext_couchdb'
                                 ])
        self._run_app()
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        self.assertEqual(s["project_id"], "P003")

    def test_qc_update_sample_names(self):
        """Test running qc update of project sample names"""
        s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA")
        s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG")
        s1["project_sample_name"] = None
        s2["project_sample_name"] = None
        self.assertIsNone(s1["project_sample_name"])
        self.assertIsNone(s2["project_sample_name"])
        self.s_con.save(s1)
        self.s_con.save(s2)
        sample_map = {
            'P001_101_index3': 'P001_101_index3',
            'P001_102_index6': 'P001_102'
        }
        self.app = self.make_app(argv=[
            'qc', 'update', '--sample_prj', projects[0], '--names',
            "{}".format(sample_map), '--debug', '--force'
        ],
                                 extensions=[
                                     'scilifelab.pm.ext.ext_qc',
                                     'scilifelab.pm.ext.ext_couchdb'
                                 ])
        self._run_app()
        s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA")
        s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG")
        self.assertEqual(s1["project_sample_name"], "P001_101_index3")
        self.assertEqual(s2["project_sample_name"], "P001_102")
Esempio n. 7
0
class TestQCUpload(PmFullTest):
    def setUp(self):
        self.app = self.make_app(
            argv=["qc", "upload-qc", flowcells[0], "--mtime", "10000"],
            extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"],
        )
        self._run_app()
        self.s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******")
        self.p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******")
        self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******")

    def test_samplesheet(self):
        """Test samplesheet upload"""
        fc = self.fc_con.get_entry("120924_AC003CCCXX")
        self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA")
        self.assertEqual(fc["samplesheet_csv"][0]["Description"], "J__Doe_00_01")
        self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX")
        self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19")
        self.assertEqual(fc["samplesheet_csv"][2]["SampleID"], "P001_101_index3")

    def test_qc_upload(self):
        """Test running qc upload to server"""
        self.app = self.make_app(
            argv=["qc", "upload-qc", flowcells[1], "--mtime", "100"],
            extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"],
        )
        self._run_app()
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        self.assertIsNone(s["project_sample_name"])
        self.assertEqual(s["project_id"], "P003")

    def test_qc_update(self):
        """Test running qc update of a project id"""
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        s["project_id"] = None
        self.assertIsNone(s["project_id"])
        self.s_con.save(s)
        self.app = self.make_app(
            argv=["qc", "update", "--sample_prj", projects[2], "--project_id", "P003", "--debug", "--force"],
            extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"],
        )
        self._run_app()
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        self.assertEqual(s["project_id"], "P003")

    def test_qc_update_sample_names(self):
        """Test running qc update of project sample names"""
        s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA")
        s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG")
        s1["project_sample_name"] = None
        s2["project_sample_name"] = None
        self.assertIsNone(s1["project_sample_name"])
        self.assertIsNone(s2["project_sample_name"])
        self.s_con.save(s1)
        self.s_con.save(s2)
        sample_map = {"P001_101_index3": "P001_101_index3", "P001_102_index6": "P001_102"}
        self.app = self.make_app(
            argv=[
                "qc",
                "update",
                "--sample_prj",
                projects[0],
                "--names",
                "{}".format(sample_map),
                "--debug",
                "--force",
            ],
            extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"],
        )
        self._run_app()
        s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA")
        s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG")
        self.assertEqual(s1["project_sample_name"], "P001_101_index3")
        self.assertEqual(s2["project_sample_name"], "P001_102")
Esempio n. 8
0
class TestQCUpload(PmFullTest):
    def setUp(self):
        """FIXME: All other tests depend on data being uploaded, so
        these are not real unit tests. The setup to TestQCUpload has to
        be run prior to other tests, else unexpected failures will
        occur."""
        self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[0], '--mtime',  '10000'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb'])
        self._run_app()
        self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[1], '--mtime',  '10000'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb'])
        self._run_app()
        self.s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******")
        self.p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******")
        self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******")

    def test_samplesheet(self):
        """Test samplesheet upload"""
        fc = self.fc_con.get_entry("120924_AC003CCCXX")
        self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA")
        self.assertEqual(fc["samplesheet_csv"][0]["Description"], "J__Doe_00_01")
        self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX")
        self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19")
        self.assertEqual(fc["samplesheet_csv"][2]["SampleID"], "P002_101_index3")

    def test_qc_upload(self):
        """Test running qc upload to server. Slightly circular testing
        here - I setup the module with qc update so by definition the
        test must 'work'"""
        self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[1], '--mtime',  '100'], extensions=['scilifelab.pm.ext.ext_qc',  'scilifelab.pm.ext.ext_couchdb'])
        self._run_app()
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        self.assertIsNone(s["project_sample_name"])
        self.assertEqual(s["project_id"], "P003")
        
    def test_qc_update(self):
        """Test running qc update of a project id"""
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        s["project_id"]= None
        self.assertIsNone(s["project_id"])
        self.s_con.save(s)
        self.app = self.make_app(argv = ['qc', 'update', '--sample_prj', projects[2], '--project_id', 'P003', '--debug', '--force'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb'])
        self._run_app()
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        self.assertEqual(s["project_id"], "P003")

    def test_qc_update_sample_names(self):
        """Test running qc update of project sample names"""
        s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA")
        s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG")
        s1["project_sample_name"] = None
        s2["project_sample_name"] = None
        self.assertIsNone(s1["project_sample_name"])
        self.assertIsNone(s2["project_sample_name"])
        self.s_con.save(s1)
        self.s_con.save(s2)
        sample_map = {'P001_101_index3': 'P001_101_index3', 'P001_102_index6':'P001_102'}
        self.app = self.make_app(argv = ['qc', 'update', '--sample_prj', projects[0], '--names', "{}".format(sample_map), '--debug', '--force'], extensions=['scilifelab.pm.ext.ext_qc',  'scilifelab.pm.ext.ext_couchdb'])
        self._run_app()
        s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA")
        s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG")
        self.assertEqual(s1["project_sample_name"], "P001_101_index3")
        self.assertEqual(s2["project_sample_name"], "P001_102")