class LimsService(object): name = "lims_service" def __init__(self): super(LimsService, self).__init__() self.lims = Lims(BASEURI, USERNAME, PASSWORD) self.lims.check_version() @rpc def sample(self, lims_id): """Get a sample from LIMS.""" sample_obj = LimsSample(self.lims, id=lims_id) sample_dict = transform_entry(sample_obj) return sample_dict @rpc def validate_sample(self, lims_id): """Validate information in the LIMS on sample level.""" logger.debug("fetch sample from LIMS") sample_obj = LimsSample(self.lims, id=lims_id) try: case_id = sample_obj.udf["familyID"] cust_id = sample_obj.udf["customer"] except KeyError as error: raise MissingLimsDataException(error.message) except HTTPError as error: raise LimsSampleNotFoundError(error.message) @rpc def ls_case(self, cust_id, case_id): """Fetch all samples for a case from the LIMS.""" sample_objs = self.lims.get_samples(udf={"customer": cust_id, "familyID": case_id}) sample_dicts = [transform_entry(sample) for sample in sample_objs] analysis_types = set(sample["analysis_type"] for sample in sample_dicts) case_data = {"analysis_types": list(analysis_types), "samples": sample_dicts} return case_data @rpc def ls_project(self, project_id): """List all samples in a project.""" samples = self.lims.get_samples(projectname=project_id) lims_ids = [sample.id for sample in samples] return lims_ids @rpc def pedigree(self, cust_id, case_id): """Generate pedigree content for a case.""" ped_content = serialize_pedigree(self.lims, cust_id, case_id) return ped_content @rpc def target_reads(self, lims_id): """Determine the amount of reads to be sequenced.""" sample_obj = LimsSample(self.lims, id=lims_id) app_tag = analysis_info(sample_obj) # millions of reads target_reads = app_tag["reads"] * 1000000 return target_reads
def main(args): lims_db = get_session() lims = Lims(BASEURI, USERNAME, PASSWORD) with open(args.conf) as cf: db_conf = yaml.load(cf) couch = setupServer(db_conf) db = couch["expected_yields"] postgres_string = "{} hours".format(args.hours) project_ids = get_last_modified_projectids(lims_db, postgres_string) for project in [Project(lims, id=x) for x in project_ids]: samples_count = 0 samples = lims.get_samples(projectname=project.name) for sample in samples: if not ("Status (manual)" in sample.udf and sample.udf["Status (manual)"] == "Aborted"): samples_count += 1 try: lanes_ordered = project.udf['Sequence units ordered (lanes)'] key = parse_sequencing_platform(project.udf['Sequencing platform']) except: continue for row in db.view("yields/min_yield"): db_key = [x.lower() if x else None for x in row.key] if db_key == key: try: project.udf['Reads Min'] = float( row.value) * lanes_ordered / samples_count project.put() except ZeroDivisionError: pass
def main(args): lims_db = get_session() lims = Lims(BASEURI,USERNAME,PASSWORD) with open(args.conf) as cf: db_conf = yaml.load(cf) couch = setupServer(db_conf) db = couch["expected_yields"] postgres_string="{} hours".format(args.hours) project_ids=get_last_modified_projectids(lims_db, postgres_string) for project in [Project(lims, id=x) for x in project_ids]: samples_count = 0 samples = lims.get_samples(projectname=project.name) for sample in samples: if not("Status (manual)" in sample.udf and sample.udf["Status (manual)"] == "Aborted"): samples_count +=1 try: lanes_ordered = project.udf['Sequence units ordered (lanes)'] key = parse_sequencing_platform(project.udf['Sequencing platform']) except: continue for row in db.view("yields/min_yield"): db_key = [x.lower() if x else None for x in row.key] if db_key==key: try: project.udf['Reads Min'] = float(row.value) * lanes_ordered / samples_count project.put() except ZeroDivisionError: pass
def test_D(server_test1): # GIVEN: A lims with a sample with: # name: 'maya' # Udf "Source": "blood", "Reads missing (M)": 0 # WHEN creating a genologics Lims object and filtering on the fields. lims = Lims("http://127.0.0.1:8000", 'dummy', 'dummy') samples = lims.get_samples(udf={"Source": "blood", "Reads missing (M)": 0}, name='maya') # Then the sample should be found assert samples == [Sample(lims, id='ACC2351A2')]
class MultiQC_clarity_metadata(BaseMultiqcModule): def __init__(self): self.log = logging.getLogger('multiqc') # Check that this plugin hasn't been disabled if config.kwargs.get('disable_clarity', False) is True: self.log.info( "Skipping MultiQC_Clarity as disabled on command line") return None if getattr(config, 'disable_clarity', False) is True: self.log.debug( "Skipping MultiQC_Clarity as specified in config file") return None super(MultiQC_clarity_metadata, self).__init__(name='Clarity LIMS', anchor='clarity') self.intro = '''<p>The <a href="https://github.com/MultiQC/MultiQC_Clarity" target="_blank">MultiQC_Clarity</a> plugin fetches data from a specified <a href="https://www.genologics.com/clarity-lims/" target="_blank">Basespace Clarity LIMS</a> instance.</p>''' self.lims = Lims(BASEURI, USERNAME, PASSWORD) self.metadata = {} self.header_metadata = {} self.general_metadata = {} self.tab_metadata = {} self.samples = [] self.schema = getattr(config, 'clarity', None) if self.schema is None: self.log.debug("No config found for MultiQC_Clarity") return None self.get_samples() self.get_metadata('report_header_info') self.get_metadata('general_stats') self.get_metadata('clarity_module') self.update_multiqc_report() self.make_sections() report.modules_output.append(self) def get_samples(self): if config.kwargs.get('clarity_project_name'): pj = self.lims.get_projects( name=config.kwargs['clarity_project_name']) self.samples = pj.samples else: names = set() for x in report.general_stats_data: names.update(x.keys()) for d in report.saved_raw_data.values(): try: self.names.update(d.keys()) except AttributeError: pass if not config.kwargs.get('clarity_skip_edit_names'): names = self.edit_names(names) self.log.debug("Looking into Clarity for samples {}".format( ", ".join(names))) found = 0 try: for name in names: matching_samples = self.lims.get_samples(name=name) if not matching_samples: self.log.error( "Could not find a sample matching {0}, skipping.". format(name)) continue if len(matching_samples) > 1: self.log.error( "Found multiple samples matching {0}, skipping". format(name)) continue found += 1 self.samples.append(matching_samples[0]) except Exception as e: self.log.warn( "Could not connect to Clarity LIMS: {}".format(e)) return None self.log.info("Found {} out of {} samples in LIMS.".format( found, len(names))) def edit_names(self, names): edited = [] for name in names: if name.endswith("_1") or name.endswith("_2"): edited.append(name[:-2]) elif name.endswith("_R1") or name.endswith("_R2"): edited.append(name[:-3]) else: edited.append(name) return edited def flatten_metadata(self, metadata): for first_level in metadata: for second_level in metadata[first_level]: if isinstance(metadata[first_level][second_level], set) or isinstance( metadata[first_level][second_level], list): metadata[first_level][second_level] = ", ".join( metadata[first_level][second_level]) return metadata def get_project_metadata(self, udfs): project_metadata = {} for sample in self.samples: project_metadata[sample.project.name] = {} for udf in udfs: if udf in sample.project.udf: try: project_metadata[sample.project.name][udf].add( str(sample.project.udf[udf])) except: project_metadata[sample.project.name][udf] = set() project_metadata[sample.project.name][udf].add( str(sample.project.udf[udf])) return self.flatten_metadata(project_metadata) def get_sample_metadata(self, udfs): sample_metadata = {} for sample in self.samples: sample_metadata[sample.name] = {} for udf in udfs: if udf in sample.udf: try: sample_metadata[sample.name][udf].add( str(sample.udf[udf])) except: sample_metadata[sample.name][udf] = set() sample_metadata[sample.name][udf].add( str(sample.udf[udf])) return self.flatten_metadata(sample_metadata) def get_metadata(self, part): for key in self.schema[part]: if key == 'Project': metadata = self.get_project_metadata( self.schema[part]['Project']) elif key == 'Sample': metadata = self.get_sample_metadata( self.schema[part]['Sample']) else: metadata = self.get_artifact_metadata(self.schema[part]) if part == "report_header_info": self.header_metadata.update(metadata) elif part == "general_stats": self.general_metadata.update(metadata) else: self.tab_metadata.update(metadata) def get_artifact_metadata(self, pt_to_udfs): artifact_metadata = {} for sample in self.samples: artifact_metadata[sample.name] = {} for process_type in pt_to_udfs: if process_type == 'Sample': continue if process_type == 'Project': continue artifacts = self.lims.get_artifacts(sample_name=sample.name, process_type=process_type) for udf_name in pt_to_udfs[process_type].get("outputs", []): values = [] for artifact in artifacts: if udf_name in artifact.udf: values.append(str(artifact.udf[udf_name])) artifact_metadata[sample.name][udf_name] = values processes = set([art.parent_process for art in artifacts]) inputs = [] for p in processes: inputs.extend([ art for art in p.all_inputs() if sample.name in [s.name for s in art.samples] ]) for udf_name in pt_to_udfs[process_type].get("inputs", []): values = [] for artifact in inputs: if udf_name in artifact.udf: values.append(str(artifact.udf[udf_name])) artifact_metadata[sample.name][udf_name] = values return self.flatten_metadata(artifact_metadata) def update_multiqc_report(self): if config.report_header_info is None: config.report_header_info = [] for first_level in self.header_metadata: d = {} for key in self.header_metadata[first_level]: d[key] = self.header_metadata[first_level][key] config.report_header_info.append(d) headers = {} for first_level in self.schema["general_stats"]: for header in self.schema["general_stats"][first_level]: headers[header] = {} if isinstance( self.schema["general_stats"][first_level][header], dict): for subsubkey, cfg in self.schema["general_stats"][ first_level][header].items(): if subsubkey == 'multiply_by': mby = str(cfg)[:] headers[header]['modify'] = lambda x: float( x) * float(mby) else: headers[header][subsubkey] = cfg headers[header]['description'] = headers[header].get( 'description', '{} - {}'.format(first_level, header)) headers[header]['namespace'] = headers[header].get( 'namespace', 'Clarity LIMS') headers[header]['scale'] = headers[header].get('scale', 'YlGn') report.general_stats_headers.append(headers) report.general_stats_data.append(self.general_metadata) def make_sections(self): headers = OrderedDict() for first_level in self.tab_metadata: for header in self.tab_metadata[first_level]: desc = header if header not in headers: headers[header] = {} for key in self.schema['clarity_module']: if header in self.schema['clarity_module'][key]: desc = key elif isinstance(self.schema['clarity_module'][key], dict): for subkey, val in self.schema['clarity_module'][ key].items(): # print(val) if val is None: break elif header in val: desc = key if isinstance(val[header], dict): for subsubkey, cfg in val[ header].items(): if subsubkey == 'multiply_by': mby = str(cfg)[:] headers[header][ 'modify'] = lambda x: float( x) * float(mby) else: headers[header][ subsubkey] = cfg headers[header]['namespace'] = headers[header].get( 'namespace', desc) headers[header]['title'] = headers[header].get( 'title', header) headers[header]['description'] = headers[header].get( 'description', header) self.intro += table.plot(self.tab_metadata, headers)
class ProjectReport: def __init__(self, project_name): self.project_name = project_name self.project_source = os.path.join(cfg.query('sample','delivery_source'), project_name) self.project_delivery = os.path.join(cfg.query('sample','delivery_dest'), project_name) self.lims=Lims(**cfg.get('clarity')) self.params = {'project_name':project_name} self.results = {} self.fill_sample_names_from_lims() self.samples_delivered = self.read_metrics_csv(os.path.join(self.project_delivery, 'summary_metrics.csv')) self.get_sample_param() self.fill_project_information_from_lims() def fill_project_information_from_lims(self): project = self.lims.get_projects(name=self.project_name)[0] self.project_info = {} self.project_info['project_name']=['Project name:',self.project_name] self.project_info['project_title']=['Project title:', project.udf.get('Project Title', '')] self.project_info['enquiry'] = ['Enquiry no:', project.udf.get('Enquiry Number', '')] self.project_info['quote'] = ['Quote no:', project.udf.get('Quote No.', '')] self.project_info['researcher'] = ['Researcher:','%s %s (%s)'%(project.researcher.first_name, project.researcher.last_name, project.researcher.email)] self.project_order = ['project_name', 'project_title', 'enquiry', 'quote', 'researcher'] def fill_sample_names_from_lims(self): samples = self.lims.get_samples(projectname=self.project_name) self.samples = [s.name for s in samples] self.modified_samples = [re.sub(r'[: ]','_', s.name) for s in samples] def get_library_workflow_from_sample(self, sample_name): samples = self.lims.get_samples(projectname=self.project_name, name=sample_name) if len(samples) == 1: return samples[0].udf.get('Prep Workflow') else: app_logger.error('%s samples found for sample name %s'%sample_name) def get_species_from_sample(self, sample_name): samples = self.lims.get_samples(projectname=self.project_name, name=sample_name) if len(samples) == 1: s = samples[0].udf.get('Species') return species_alias.get(s, s) else: app_logger.error('%s samples found for sample name %s'%sample_name) def parse_program_csv(self, program_csv): all_programs = {} if os.path.exists(program_csv): with open(program_csv) as open_prog: for row in csv.reader(open_prog): all_programs[row[0]]=row[1] #TODO: change the hardcoded version of bcl2fastq all_programs['bcl2fastq'] = '2.17.1.14' for p in ['bcl2fastq','bcbio', 'bwa', 'gatk', 'samblaster']: if p in all_programs: self.params[p + '_version']=all_programs.get(p) def parse_project_summary_yaml(self, summary_yaml): with open(summary_yaml, 'r') as open_file: full_yaml = yaml.safe_load(open_file) sample_yaml=full_yaml['samples'][0] path_to_bcbio = os.path.basename(os.path.dirname(sample_yaml['dirs']['galaxy'])) self.params['bcbio_version'] = path_to_bcbio.split('/')[-2] if sample_yaml['genome_build'] == 'hg38': self.params['genome_version'] = 'GRCh38 (with alt, decoy and HLA sequences)' def read_metrics_csv(self, metrics_csv): samples_to_info={} with open(metrics_csv) as open_metrics: reader = csv.DictReader(open_metrics, delimiter='\t', quoting=csv.QUOTE_NONE) for row in reader: samples_to_info[row['Sample Id']] = row return samples_to_info def get_sample_param(self): self.fill_sample_names_from_lims() project_size = 0 library_workflows=set() species = set() for sample in self.samples: library_workflow = self.get_library_workflow_from_sample(sample) library_workflows.add(library_workflow) species.add(self.get_species_from_sample(sample)) if len(library_workflows) == 1 : self.library_workflow = library_workflows.pop() else: app_logger.error('More than one workfkow used in project %s: %s'%(self.project_name, ', '.join(library_workflows))) if len(species) == 1 : self.species = species.pop() else: app_logger.error('More than one species used in project %s: %s'%(self.project_name, ', '.join(species))) if self.library_workflow in ['TruSeq Nano DNA Sample Prep', None] : self.template = 'truseq_nano_template' elif self.library_workflow in ['TruSeq PCR-Free DNA Sample Prep', 'TruSeq PCR-Free Sample Prep'] : self.template = 'truseq_pcrfree_template' else: app_logger.error('Unknown library workflow %s for project %s'%(self.library_workflow, self.project_name)) return None if self.species == 'Human': self.template += '.html' else: self.template += '_non_human.html' self.params['adapter1'] = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA" self.params['adapter2'] = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT" project_size = getFolderSize(self.project_delivery) for sample in set(self.modified_samples): sample_source=os.path.join(self.project_source, sample) if os.path.exists(sample_source): program_csv = os.path.join(sample_source, 'programs.txt') if not os.path.exists(program_csv): program_csv = os.path.join(sample_source, '.qc', 'programs.txt') self.parse_program_csv(program_csv) summary_yaml = os.path.join(sample_source, 'project-summary.yaml') if not os.path.exists(summary_yaml): summary_yaml = os.path.join(sample_source, '.qc', 'project-summary.yaml') if os.path.exists(summary_yaml): self.parse_project_summary_yaml(summary_yaml) self.results['project_size']=['Total folder size:','%.2fTb'%(project_size/1000000000000.0)] self.results['nb_sample']=['Number of sample:', len(self.samples)] self.results['nb_sample_delivered']=['Number of sample delivered:',len(self.samples_delivered)] yields = [float(self.samples_delivered[s]['Yield']) for s in self.samples_delivered] self.results['yield']=['Total yield Gb:','%.2f'%sum(yields)] self.results['mean_yield']=['Average yield Gb:','%.1f'%(sum(yields)/max(len(yields), 1))] try: coverage = [float(self.samples_delivered[s]['Mean coverage']) for s in self.samples_delivered] self.results['coverage']=['Average coverage per samples:','%.2f'%(sum(coverage)/max(len(coverage), 1))] self.results_order=['nb_sample','nb_sample_delivered', 'yield', 'mean_yield', 'coverage', 'project_size'] except KeyError: self.results_order=['nb_sample','nb_sample_delivered', 'yield', 'mean_yield', 'project_size'] def generate_report(self): template_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'templates')) env = Environment(loader=FileSystemLoader(template_dir)) template = env.get_template(self.template) output = template.render(results_order=self.results_order, results=self.results, project_info=self.project_info, project_order=self.project_order, **self.params) pdf = get_pdf(output) project_file = os.path.join(self.project_delivery, 'project_%s_report.pdf'%self.project_name) with open(project_file, 'w') as open_pdf: open_pdf.write(pdf.getvalue())
class LimsAPI(object): """docstring for LimsAPI""" def __init__(self, lims=None): super(LimsAPI, self).__init__() self.lims = lims def init_app(self, app): """Connect with credentials from Flask app config.""" self.connect(**app.config['LIMS_CONFIG']) def connect(self, baseuri, username, password): """Connect to the LIMS instance.""" self.lims = Lims(baseuri, username, password) def sample(self, lims_id): """Get a sample from the LIMS.""" logger.debug('fetch sample from LIMS') sample_obj = Sample(self.lims, id=lims_id) try: sample_json = transform_entry(sample_obj) except KeyError as error: message = "missing UDF: {}".format(error.message) logger.warn(message) raise MissingLimsDataException(message) except HTTPError as error: logger.warn('unknown lims id') raise error return sample_json def samples(self, project_id=None, case=None, sample_ids=None, limit=20, **kwargs): if project_id: sample_objs = self.lims.get_samples(projectname=project_id) elif case: sample_objs = self.lims.get_samples(udf={'customer': case[0], 'familyID': case[1]}) else: sample_objs = self.lims.get_samples(**kwargs) sample_dicts = [] for index, sample in enumerate(sample_objs): if index < limit: sample_dicts.append(transform_entry(sample)) else: break analysis_types = set(sample['analysis_type'] for sample in sample_dicts) case_data = { 'analysis_types': list(analysis_types), 'samples': sample_dicts } return case_data def cases(self): """Return a list of cases from the database.""" samples = ((sample.udf['customer'], sample.udf['familyID']) for sample in self.lims.get_samples() if 'familyID' in sample.udf and 'customer' in sample.udf) return samples
class MultiQC_clarity_metadata(BaseMultiqcModule): def __init__(self): self.log = logging.getLogger('multiqc') # Check that this plugin hasn't been disabled if config.kwargs.get('disable_clarity', False) is True: self.log.info( "Skipping MultiQC_Clarity as disabled on command line") return None if getattr(config, 'disable_clarity', False) is True: self.log.debug( "Skipping MultiQC_Clarity as specified in config file") return None super(MultiQC_clarity_metadata, self).__init__( name='Clarity', anchor='clarity', href='https://github.com/Galithil/MultiQC_Clarity', info="fetches data from your Basespace Clarity LIMS instance.") self.lims = Lims(BASEURI, USERNAME, PASSWORD) self.metadata = {} self.header_metadata = {} self.general_metadata = {} self.tab_metadata = {} self.samples = [] self.sections = [] self.schema = getattr(config, 'clarity', None) if self.schema is None: self.log.warn("No config found for MultiQC_Clarity") return None self.get_samples() self.get_metadata('Header') self.get_metadata('General Statistics') self.get_metadata('Clarity Tab') self.update_multiqc_report() self.make_sections() report.modules_output.append(self) def get_samples(self): if config.kwargs.get('clarity_project_name'): pj = self.lims.get_projects( name=config.kwargs['clarity_project_name']) self.samples = pj.samples else: names = set() for x in report.general_stats_data: names.update(x.keys()) for d in report.saved_raw_data.values(): try: self.names.update(d.keys()) except AttributeError: pass if not config.kwargs.get('clarity_skip_edit_names'): names = self.edit_names(names) self.log.debug("Looking into Clarity for samples {}".format( ", ".join(names))) found = 0 try: for name in names: matching_samples = self.lims.get_samples(name=name) if not matching_samples: self.log.error( "Could not find a sample matching {0}, skipping.". format(name)) continue if len(matching_samples) > 1: self.log.error( "Found multiple samples matching {0}, skipping". format(name)) continue found += 1 self.samples.append(matching_samples[0]) except Exception as e: self.log.warn( "Could not connect to Clarity LIMS: {}".format(e)) return None self.log.info("Found {} out of {} samples in LIMS.".format( found, len(names))) def edit_names(self, names): edited = [] for name in names: if name.endswith("_1") or name.endswith("_2"): edited.append(name[:-2]) elif name.endswith("_R1") or name.endswith("_R2"): edited.append(name[:-3]) else: edited.append(name) return edited def flatten_metadata(self, metadata): for first_level in metadata: for second_level in metadata[first_level]: if isinstance(metadata[first_level][second_level], set) or isinstance( metadata[first_level][second_level], list): metadata[first_level][second_level] = ", ".join( metadata[first_level][second_level]) return metadata def get_project_metadata(self, udfs): project_metadata = {} for sample in self.samples: project_metadata[sample.project.name] = {} for udf in udfs: if udf in sample.project.udf: try: project_metadata[sample.project.name][udf].add( str(sample.project.udf[udf])) except: project_metadata[sample.project.name][udf] = set() project_metadata[sample.project.name][udf].add( str(sample.project.udf[udf])) return self.flatten_metadata(project_metadata) def get_sample_metadata(self, udfs): sample_metadata = {} for sample in self.samples: sample_metadata[sample.name] = {} for udf in udfs: if udf in sample.udf: try: sample_metadata[sample.name][udf].add( str(sample.udf[udf])) except: sample_metadata[sample.name][udf] = set() sample_metadata[sample.name][udf].add( str(sample.udf[udf])) return self.flatten_metadata(sample_metadata) def get_metadata(self, part): for key in self.schema[part]: if key == 'Project': metadata = self.get_project_metadata( self.schema[part]['Project']) elif key == 'Sample': metadata = self.get_sample_metadata( self.schema[part]['Sample']) else: metadata = self.get_artifact_metadata(self.schema[part]) if part == "Header": self.header_metadata.update(metadata) elif part == "General Statistics": self.general_metadata.update(metadata) else: self.tab_metadata.update(metadata) def get_artifact_metadata(self, pt_to_udfs): artifact_metadata = {} for sample in self.samples: artifact_metadata[sample.name] = {} for process_type in pt_to_udfs: if process_type == 'Sample': continue if process_type == 'Project': continue artifacts = self.lims.get_artifacts(sample_name=sample.name, process_type=process_type) for udf_name in pt_to_udfs[process_type].get("outputs", []): values = [] for artifact in artifacts: if udf_name in artifact.udf: values.append(str(artifact.udf[udf_name])) artifact_metadata[sample.name][udf_name] = values processes = set([art.parent_process for art in artifacts]) inputs = [] for p in processes: inputs.extend([ art for art in p.all_inputs() if sample.name in [s.name for s in art.samples] ]) for udf_name in pt_to_udfs[process_type].get("inputs", []): values = [] for artifact in inputs: if udf_name in artifact.udf: values.append(str(artifact.udf[udf_name])) artifact_metadata[sample.name][udf_name] = values return self.flatten_metadata(artifact_metadata) def update_multiqc_report(self): if config.report_header_info is None: config.report_header_info = [] for first_level in self.header_metadata: d = {} for key in self.header_metadata[first_level]: d[key] = self.header_metadata[first_level][key] config.report_header_info.append(d) headers = {} for first_level in self.schema["General Statistics"]: for header in self.schema["General Statistics"][first_level]: headers[header] = { 'description': first_level, 'namespace': 'Clarity', 'scale': 'YlGn' } report.general_stats_headers.append(headers) report.general_stats_data.append(self.general_metadata) def make_sections(self): headers = OrderedDict() for first_level in self.tab_metadata: for header in self.tab_metadata[first_level]: desc = header if header not in headers: for key in self.schema['Clarity Tab']: if header in self.schema['Clarity Tab'][key]: desc = key elif isinstance(self.schema['Clarity Tab'][key], dict): for subkey in self.schema['Clarity Tab'][key]: if header in self.schema['Clarity Tab'][key][ subkey]: desc = key headers[header] = { 'namespace': desc, 'title': header, 'description': header } self.sections.append({ 'name': 'Clarity Data', 'anchor': 'clarity_data', 'content': '<p> Data obtained from Illumina Basespace Clarity LIMS.</p>' + table.plot(self.tab_metadata, headers) })
class MultiQC_clarity_metadata(BaseMultiqcModule): def __init__(self): self.log = logging.getLogger('multiqc') # Check that this plugin hasn't been disabled if config.kwargs.get('disable_clarity', False) is True: self.log.info( "Skipping MultiQC_Clarity as disabled on command line") return if getattr(config, 'disable_clarity', False) is True: self.log.debug( "Skipping MultiQC_Clarity as specified in config file") return super(MultiQC_clarity_metadata, self).__init__(name='Clarity LIMS', anchor='clarity') self.intro = '''<p>The <a href="https://github.com/MultiQC/MultiQC_Clarity" target="_blank">MultiQC_Clarity</a> plugin fetches data from a specified <a href="https://www.genologics.com/clarity-lims/" target="_blank">Basespace Clarity LIMS</a> instance.</p>''' try: from genologics.lims import Lims from genologics import config as genologics_config except: self.log.warning("Importing genologics failed: " + traceback.format_exc()) return try: BASEURI, USERNAME, PASSWORD, VERSION, MAIN_LOG = genologics_config.load_config( specified_config=config.kwargs.get('clarity_config')) except SystemExit: self.log.warning( "Genologics config file is not specified as --clarity_config or in ~/.genologicsrc. " "Skip running Clarity module") return self.lims = Lims(BASEURI, USERNAME, PASSWORD) self.metadata = {} self.header_metadata = {} self.general_metadata = {} self.tab_metadata = {} self.samples = [] self.schema = getattr(config, 'clarity', None) if self.schema is None: self.log.debug("No config found for MultiQC_Clarity") return try: self.get_samples() if 'report_header_info' in self.schema: self.get_metadata('report_header_info') if 'general_stats' in self.schema: self.get_metadata('general_stats') if 'clarity_module' in self.schema: self.get_metadata('clarity_module') self.update_multiqc_report() self.make_sections() report.modules_output.append(self) except: self.log.error("MultiQC_Clarity failed: " + traceback.format_exc()) return def csv_file_from_samplesheet(self, sample_sheet): csv_lines = [] with open(sample_sheet) as f: found_data = False for line in f: if found_data: csv_lines.append(line.strip()) else: if line.strip().startswith('[Data]'): found_data = True return csv_lines def get_raw_sample_names(self, csv_fpath, names): raw_sample_names = dict() with open(csv_fpath) as f: csv_reader = csv.DictReader(f) name_col = csv_reader.fieldnames[0] for r in csv_reader: correct_name = r['description'] if 'description' in r else r[ name_col] if correct_name not in names: continue raw_sample_names[correct_name] = r[name_col] return raw_sample_names def correct_sample_name(self, name): import re name = re.sub(r'_S\d+$', '', name) return name.replace('.', '_') def search_by_samplesheet(self, names): sample_sheet_fpath = config.kwargs['samplesheet'] samples_by_container = defaultdict(dict) raw_names = dict((name, name) for name in names) if config.kwargs.get('bcbio_csv') and isfile( config.kwargs.get('bcbio_csv')): raw_names = self.get_raw_sample_names(config.kwargs['bcbio_csv'], names) correct_sample_names = dict( (self.correct_sample_name(raw_names[name]), name) for name in names) for row in csv.DictReader( self.csv_file_from_samplesheet(sample_sheet_fpath), delimiter=','): sample_name = row['SampleName'] if 'SampleName' in row else ( row['Sample_Name'] if 'Sample_Name' in row else row['SampleRef']) sample_id = row['SampleID'] if 'SampleID' in row else row[ 'Sample_ID'] sample_artifacts = self.lims.get_artifacts(samplelimsid=sample_id) if sample_artifacts: sample = sample_artifacts[0].samples[0] sample.name = correct_sample_names[sample_name] self.samples.append(sample) elif sample_name and sample_name in correct_sample_names.keys(): try: container, sample_well = row['SamplePlate'], row[ 'SampleWell'].replace('_', ':') samples_by_container[container][sample_well] = sample_name except: pass for container_id, samples in samples_by_container.items(): artifacts = self.lims.get_artifacts(containerlimsid=container_id) if not artifacts: continue placements = artifacts[0].container.get_placements() for well, sample_name in samples.items(): sample = placements[well].samples[0] sample.name = correct_sample_names[sample_name] self.samples.append(sample) def get_samples(self): if config.kwargs.get('clarity_project_name'): pj = self.lims.get_projects( name=config.kwargs['clarity_project_name']) self.samples = pj.samples self.log.info("Found {} in LIMS.".format( config.kwargs['clarity_project_name'])) else: names = set() for x in report.general_stats_data: names.update(x.keys()) for d in report.saved_raw_data.values(): try: self.names.update(d.keys()) except AttributeError: pass # if not config.kwargs.get('clarity_skip_edit_names'): # names = self.edit_names(names) self.log.debug("Looking into Clarity for samples {}".format( ", ".join(names))) if config.kwargs.get('samplesheet'): self.search_by_samplesheet(names) if not self.samples: try: for name in names: matching_samples = self.lims.get_samples(name=name) if not matching_samples: self.log.error( "Could not find a sample matching {0}, skipping." .format(name)) continue if len(matching_samples) > 1: self.log.error( "Found multiple samples matching {0}, skipping" .format(name)) continue self.samples.append(matching_samples[0]) except Exception as e: self.log.warn( "Could not connect to Clarity LIMS: {}".format(e)) return None self.log.info("Found {} out of {} samples in LIMS.".format( len(self.samples), len(names))) def edit_names(self, names): edited = [] for name in names: if name.endswith("_1") or name.endswith("_2"): edited.append(name[:-2]) elif name.endswith("_R1") or name.endswith("_R2"): edited.append(name[:-3]) else: edited.append(name) return edited def flatten_metadata(self, metadata): for first_level in metadata: for second_level in metadata[first_level]: if isinstance(metadata[first_level][second_level], set) or isinstance( metadata[first_level][second_level], list): metadata[first_level][second_level] = ", ".join( metadata[first_level][second_level]) return metadata def get_project_metadata(self, udfs): project_metadata = {} for sample in self.samples: project_metadata[sample.project.name] = {} for udf in udfs: if udf in sample.project.udf: try: project_metadata[sample.project.name][udf].add( str(sample.project.udf[udf])) except: project_metadata[sample.project.name][udf] = set() project_metadata[sample.project.name][udf].add( str(sample.project.udf[udf])) return self.flatten_metadata(project_metadata) def get_sample_metadata(self, udfs): sample_metadata = {} report.lims_col = 'sample type' for sample in self.samples: sample_metadata[sample.name] = dict() for udf in udfs: if udf in sample.udf: try: sample_metadata[sample.name][udf].add( str(sample.udf[udf])) except: sample_metadata[sample.name][udf] = set() sample_metadata[sample.name][udf].add( str(sample.udf[udf])) sample_type = None if 'Sample Tissue' in sample_metadata[sample.name]: sample_type = sample_metadata[sample.name].pop('Sample Tissue') elif 'Sample Type' in sample_metadata[sample.name]: sample_type = sample_metadata[sample.name].pop('Sample Type') sample_link = join(self.lims.baseuri, 'clarity', 'search?scope=Sample&query=' + sample.id) if sample_type: sample_metadata[sample.name][ 'Sample Type'] = '<a href="' + sample_link + '" target="_blank">' + sample_type.pop( ) + '</a>' report.lims_added = True elif 'Sample Conc.' in sample_metadata[sample.name]: sample_metadata[sample.name]['Sample Conc.'] = '<a href="' + sample_link + '" target="_blank">' + \ sample_metadata[sample.name]['Sample Conc.'].pop() + '</a>' report.lims_added = True if not any([ 'Sample Type' in sample_metadata[sample.name] for sample in self.samples ]): report.lims_col = 'sample conc' elif not all([ 'Sample Type' in sample_metadata[sample.name] for sample in self.samples ]): report.lims_col = 'sample type or sample conc' return self.flatten_metadata(sample_metadata) def get_metadata(self, part): for key in self.schema[part]: if key == 'Project': metadata = self.get_project_metadata( self.schema[part]['Project']) elif key == 'Sample': metadata = self.get_sample_metadata( self.schema[part]['Sample']) else: metadata = self.get_artifact_metadata(self.schema[part]) if part == "report_header_info": self.header_metadata.update(metadata) elif part == "general_stats": self.general_metadata.update(metadata) else: self.tab_metadata.update(metadata) def get_artifact_metadata(self, pt_to_udfs): artifact_metadata = {} for sample in self.samples: artifact_metadata[sample.name] = {} for process_type in pt_to_udfs: if process_type == 'Sample': continue if process_type == 'Project': continue artifacts = self.lims.get_artifacts(sample_name=sample.name, process_type=process_type) for udf_name in pt_to_udfs[process_type].get("outputs", []): values = [] for artifact in artifacts: if udf_name in artifact.udf: values.append(str(artifact.udf[udf_name])) artifact_metadata[sample.name][udf_name] = values processes = set([art.parent_process for art in artifacts]) inputs = [] for p in processes: inputs.extend([ art for art in p.all_inputs() if sample.name in [s.name for s in art.samples] ]) for udf_name in pt_to_udfs[process_type].get("inputs", []): values = [] for artifact in inputs: if udf_name in artifact.udf: values.append(str(artifact.udf[udf_name])) artifact_metadata[sample.name][udf_name] = values return self.flatten_metadata(artifact_metadata) def update_multiqc_report(self): if config.report_header_info is None: config.report_header_info = [] for first_level in self.header_metadata: d = {} for key in self.header_metadata[first_level]: d[key] = self.header_metadata[first_level][key] config.report_header_info.append(d) headers = {} for first_level in self.schema["general_stats"]: for header in self.schema["general_stats"][first_level]: headers[header] = {} if isinstance( self.schema["general_stats"][first_level][header], dict): for subsubkey, cfg in self.schema["general_stats"][ first_level][header].items(): if subsubkey == 'multiply_by': mby = str(cfg)[:] headers[header]['modify'] = lambda x: float( x) * float(mby) else: headers[header][subsubkey] = cfg headers[header]['description'] = headers[header].get( 'description', '{} - {}'.format(first_level, header)) headers[header]['namespace'] = headers[header].get( 'namespace', 'Clarity LIMS') headers[header]['scale'] = headers[header].get('scale', 'YlGn') report.general_stats_headers.append(headers) report.general_stats_data.append(self.general_metadata) def make_sections(self): headers = OrderedDict() for first_level in self.tab_metadata: for header in self.tab_metadata[first_level]: desc = header if header not in headers: headers[header] = {} for key in self.schema['clarity_module']: if header in self.schema['clarity_module'][key]: desc = key elif isinstance(self.schema['clarity_module'][key], dict): for subkey, val in self.schema['clarity_module'][ key].items(): # print(val) if val is None: break elif header in val: desc = key if isinstance(val[header], dict): for subsubkey, cfg in val[ header].items(): if subsubkey == 'multiply_by': mby = str(cfg)[:] headers[header][ 'modify'] = lambda x: float( x) * float(mby) else: headers[header][ subsubkey] = cfg headers[header]['namespace'] = headers[header].get( 'namespace', desc) headers[header]['title'] = headers[header].get( 'title', header) headers[header]['description'] = headers[header].get( 'description', header) self.intro += table.plot(self.tab_metadata, headers)
Per Kraulis, Science for Life Laboratory, Stockholm, Sweden. """ from genologics.lims import Lims # Login parameters for connecting to a LIMS instance. # NOTE: Modify according to your setup. from genologics.site_cloud import BASEURI, USERNAME, PASSWORD # Create the LIMS interface instance, and check the connection and version. lims = Lims(BASEURI, USERNAME, PASSWORD) lims.check_version() # Get the list of all samples. samples = lims.get_samples() print len(samples), 'samples in total' # Get the list of samples in the project with the LIMS id KLL60. project = lims.get_project('KLL60') samples = lims.get_samples(projectlimsid=project.id) print len(samples), 'samples in', project print # Get the sample with the LIMS id JGR58A21, and print info and its UDFs. sample = lims.get_sample('JGR58A21') print sample.id, sample.name, sample.date_received, sample.uri, for key, value in sample.udf.items(): print ' ', key, '=', value # Get the sample with the name 'Joels proper sample-20'.