def read_fastqc_data(self, sample): qc_info = OrderedDict() over_seq = [] qc_desc = None qc_data_pth = self.result_root / sample.full_name / 'fastqc_data.txt' with open(qc_data_pth) as qc_data: # parse FASTQC by brute force for line in qc_data: new_sec = line.startswith('>>') sec_end = line.startswith('>>END_MODULE') if new_sec and not sec_end: qc_desc, qc_status = line.rstrip()[2:].rsplit('\t', 1) qc_info[qc_desc] = qc_status if qc_desc == "Overrepresented sequences": next_line = next(qc_data) while not sec_end: if not next_line.startswith("#Seq"): over_seq.append( OverSeq(*next_line.rstrip().split('\t')) ) next_line = next(qc_data) sec_end = next_line.startswith('>>END_MODULE') logger.debug( "Sample {}'s qc_info: {}".format(sample.full_name, qc_info) ) logger.debug("Over_seq length: {}".format(len(over_seq))) return qc_info, over_seq
def output_report(self): """Output rendered htmls to output directory. No original data is involved, just some file I/Oing. """ for name, content in self.report_html.items(): with open(self.report_root / '{}'.format(name), 'w') as f: f.write(content)
def parse_sample(self, group, sample_list): logger.debug("Reading align_summary.txt") align_txt = self.result_root / group / 'align_summary.txt' with open(align_txt) as align_summary: raw_string = align_summary.read() match_sep = _extract_separate(raw_string) if not match_sep: raise ValueError("Cannot get left/right info in align_summary.txt") match_align = _extract_align(raw_string) if not match_align: raise ValueError("Cannot get pair info in align_summary.txt") info_dict = { k: D(v) for m in [match_sep, match_align] for k, v in m.groupdict().items() } return info_dict
def _read_yaml(self): logger.info("Reading job_info.yaml") with open(self.root_path / "job_info.yaml") as f: return yaml.load(f)