def list_projects(self): if not self._check_pargs(["flowcell"]): return url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url") if not url: self.app.log.warn("Please provide a valid url: got {}".format(url)) return if not validate_fc_directory_format(self.pargs.flowcell): self.app.log.warn( "Path '{}' does not conform to bcbio flowcell directory format; aborting".format(self.pargs.flowcell) ) return out_data = [[self.pargs.flowcell]] s = self.pargs.flowcell.split("_") fcid = "_".join([s[0], s[-1]]) self.log.debug("Establishing FlowcellRunMetricsConnection") fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs)) self.log.debug("Establishing ProjectSummaryConnection") p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs)) self.log.debug("Fetching flowcell metric document for flowcell {}".format(fcid)) fc = fc_con.get_entry(fcid) if fc is None: self.log.warn("No flowcell metric document for flowcell {}".format(fcid)) return self.log.debug("Fetching csv samplesheet data for flowcell {}".format(fcid)) ssheet_data = self._get_samplesheet_sample_data(fc) if len(ssheet_data) == 0: self.log.warn("No csv samplesheet data for flowcell {}".format(fcid)) return self.log.debug("Fetch runParameter data for flowcell {}".format(fcid)) run_data = self._get_run_parameter_data(fc) if len(run_data) == 0: self.log.warn("No runParameter data for flowcell {}".format(fcid)) out_data = [ [self.pargs.flowcell, run_data.get("InstrumentType", "HiSeq2000"), run_data.get("RunMode", "High Output")] ] # Extract the project names projects = set([proj[0].replace("__", ".") for data in ssheet_data.values() for proj in data.values()]) # Extract application for each project for project in projects: self.log.debug("Fetching project data document for project {}".format(project)) pdoc = p_con.get_entry(project) if pdoc is None: self.log.warn("No project data document for project {}".format(project)) pdoc = {} application = pdoc.get("application", "N/A") out_data.append([project, application]) self.app._output_data["stdout"].write("\n".join(["\t".join([str(r) for r in row]) for row in out_data]))
def list_projects(self): if not self._check_pargs(["flowcell"]): return url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url") if not url: self.app.log.warn("Please provide a valid url: got {}".format(url)) return if not validate_fc_directory_format(self.pargs.flowcell): self.app.log.warn("Path '{}' does not conform to bcbio flowcell directory format; aborting".format(self.pargs.flowcell)) return out_data = [[self.pargs.flowcell]] s = self.pargs.flowcell.split("_") fcid = "_".join([s[0],s[-1]]) self.log.debug("Establishing FlowcellRunMetricsConnection") fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs)) self.log.debug("Establishing ProjectSummaryConnection") p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs)) self.log.debug("Fetching flowcell metric document for flowcell {}".format(fcid)) fc = fc_con.get_entry(fcid) if fc is None: self.log.warn("No flowcell metric document for flowcell {}".format(fcid)) return self.log.debug("Fetching csv samplesheet data for flowcell {}".format(fcid)) ssheet_data = self._get_samplesheet_sample_data(fc) if len(ssheet_data) == 0: self.log.warn("No csv samplesheet data for flowcell {}".format(fcid)) return self.log.debug("Fetch runParameter data for flowcell {}".format(fcid)) run_data = self._get_run_parameter_data(fc) if len(run_data) == 0: self.log.warn("No runParameter data for flowcell {}".format(fcid)) out_data = [[self.pargs.flowcell, run_data.get("InstrumentType","HiSeq2000"), run_data.get("RunMode","High Output")]] # Extract the project names projects = set([proj[0].replace("__",".") for data in ssheet_data.values() for proj in data.values()]) # Extract application for each project for project in projects: self.log.debug("Fetching project data document for project {}".format(project)) pdoc = p_con.get_entry(project) if pdoc is None: self.log.warn("No project data document for project {}".format(project)) pdoc = {} application = pdoc.get("application","N/A") type = pdoc.get("type","Check GPL") out_data.append([project,application,type]) self.app._output_data['stdout'].write("\n".join(["\t".join([str(r) for r in row]) for row in out_data]))
def upload_qc(self): if not self._check_pargs(['flowcell']): return url = self.pargs.url if self.pargs.url else self.app.config.get( "db", "url") if not url: self.app.log.warn("Please provide a valid url: got {}".format(url)) return if not validate_fc_directory_format(self.pargs.flowcell): self.app.log.warn( "Path '{}' does not conform to bcbio flowcell directory format; aborting" .format(self.pargs.flowcell)) return runinfo_csv = os.path.join(os.path.abspath(self.pargs.flowcell), "{}.csv".format(fc_id(self.pargs.flowcell))) runinfo_yaml = os.path.join(os.path.abspath(self.pargs.flowcell), "run_info.yaml") (fc_date, fc_name) = fc_parts(self.pargs.flowcell) if int(fc_date) < 120815: self.log.info( "Assuming pre-casava based file structure for {}".format( fc_id(self.pargs.flowcell))) qc_objects = self._collect_pre_casava_qc() else: self.log.info("Assuming casava based file structure for {}".format( fc_id(self.pargs.flowcell))) qc_objects = self._collect_casava_qc() if len(qc_objects) == 0: self.log.info("No out-of-date qc objects for {}".format( fc_id(self.pargs.flowcell))) return else: self.log.info("Retrieved {} updated qc objects".format( len(qc_objects))) s_con = SampleRunMetricsConnection(dbname=self.app.config.get( "db", "samples"), **vars(self.app.pargs)) fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get( "db", "flowcells"), **vars(self.app.pargs)) p_con = ProjectSummaryConnection(dbname=self.app.config.get( "db", "projects"), **vars(self.app.pargs)) for obj in qc_objects: if self.app.pargs.debug: self.log.debug("{}: {}".format(str(obj), obj["_id"])) if isinstance(obj, FlowcellRunMetricsDocument): dry("Saving object {}".format(repr(obj)), fc_con.save(obj)) if isinstance(obj, SampleRunMetricsDocument): project_sample = p_con.get_project_sample( obj.get("sample_prj", None), obj.get("barcode_name", None), self.pargs.extensive_matching) if project_sample: obj["project_sample_name"] = project_sample['sample_name'] dry("Saving object {}".format(repr(obj)), s_con.save(obj))
def upload_qc(self): if not self._check_pargs(["flowcell"]): return url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url") if not url: self.app.log.warn("Please provide a valid url: got {}".format(url)) return if not validate_fc_directory_format(self.pargs.flowcell): self.app.log.warn( "Path '{}' does not conform to bcbio flowcell directory format; aborting".format(self.pargs.flowcell) ) return runinfo_csv = os.path.join(os.path.abspath(self.pargs.flowcell), "{}.csv".format(fc_id(self.pargs.flowcell))) runinfo_yaml = os.path.join(os.path.abspath(self.pargs.flowcell), "run_info.yaml") (fc_date, fc_name) = fc_parts(self.pargs.flowcell) if int(fc_date) < 120815: self.log.info("Assuming pre-casava based file structure for {}".format(fc_id(self.pargs.flowcell))) qc_objects = self._collect_pre_casava_qc() else: self.log.info("Assuming casava based file structure for {}".format(fc_id(self.pargs.flowcell))) qc_objects = self._collect_casava_qc() if len(qc_objects) == 0: self.log.info("No out-of-date qc objects for {}".format(fc_id(self.pargs.flowcell))) return else: self.log.info("Retrieved {} updated qc objects".format(len(qc_objects))) s_con = SampleRunMetricsConnection(dbname=self.app.config.get("db", "samples"), **vars(self.app.pargs)) fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs)) p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs)) for obj in qc_objects: if self.app.pargs.debug: self.log.debug("{}: {}".format(str(obj), obj["_id"])) if isinstance(obj, FlowcellRunMetricsDocument): dry("Saving object {}".format(repr(obj)), fc_con.save(obj)) if isinstance(obj, SampleRunMetricsDocument): project_sample = p_con.get_project_sample( obj.get("sample_prj", None), obj.get("barcode_name", None), self.pargs.extensive_matching ) if project_sample: obj["project_sample_name"] = project_sample["sample_name"] dry("Saving object {}".format(repr(obj)), s_con.save(obj))