Beispiel #1
0
 def read_fastqc_data(self, sample):
     qc_info = OrderedDict()
     over_seq = []
     qc_desc = None
     qc_data_pth = self.result_root / sample.full_name / 'fastqc_data.txt'
     with open(qc_data_pth) as qc_data:
         # parse FASTQC by brute force
         for line in qc_data:
             new_sec = line.startswith('>>')
             sec_end = line.startswith('>>END_MODULE')
             if new_sec and not sec_end:
                 qc_desc, qc_status = line.rstrip()[2:].rsplit('\t', 1)
                 qc_info[qc_desc] = qc_status
                 if qc_desc == "Overrepresented sequences":
                     next_line = next(qc_data)
                     while not sec_end:
                         if not next_line.startswith("#Seq"):
                             over_seq.append(
                                 OverSeq(*next_line.rstrip().split('\t'))
                             )
                         next_line = next(qc_data)
                         sec_end = next_line.startswith('>>END_MODULE')
     logger.debug(
         "Sample {}'s qc_info: {}".format(sample.full_name, qc_info)
     )
     logger.debug("Over_seq length: {}".format(len(over_seq)))
     return qc_info, over_seq
Beispiel #2
0
    def output_report(self):
        """Output rendered htmls to output directory.

        No original data is involved, just some file I/Oing.
        """
        for name, content in self.report_html.items():
            with open(self.report_root / '{}'.format(name), 'w') as f:
                f.write(content)
Beispiel #3
0
    def parse_sample(self, group, sample_list):
        logger.debug("Reading align_summary.txt")
        align_txt = self.result_root / group / 'align_summary.txt'
        with open(align_txt) as align_summary:
            raw_string = align_summary.read()

        match_sep = _extract_separate(raw_string)
        if not match_sep:
            raise ValueError("Cannot get left/right info in align_summary.txt")
        match_align = _extract_align(raw_string)
        if not match_align:
            raise ValueError("Cannot get pair info in align_summary.txt")

        info_dict = {
            k: D(v)
            for m in [match_sep, match_align]
            for k, v in m.groupdict().items()
        }
        return info_dict
Beispiel #4
0
 def _read_yaml(self):
     logger.info("Reading job_info.yaml")
     with open(self.root_path / "job_info.yaml") as f:
         return yaml.load(f)