def print_sample_statistics(args):
    print("Counting statistics")
    sample_name = args.sampleName
    mate_handler = MateHandler()
    sample = mate_handler.get_sample_by_name(sample_name)

    total_first = 0
    total_second = 0
    total_aligned = 0
    total_deduplicated = 0

    for mate in sample.mates:
        di = MateDirInfo(mate)

        total_first += count_fastq_reads(di.first_fastqc_zip_path)
        total_second += count_fastq_reads(di.second_fastqc_zip_path)

        alignment_report = BismarkReport.fromfile(di.alignment_report_path)
        total_aligned += alignment_report.num_of_aligned

        drp = DeduplicationReportParser(di.deduplication_report_path)
        total_deduplicated += drp.sequences_left()

    print("Total first: ", total_first)
    print("Total second: ", total_second)
    print("Total algined: ", total_aligned)
    print("Total deduplicated: ", total_deduplicated)
def merge_sample_alignments(args):
    sample_name = args.sampleName
    mate_handler = MateHandler()
    sample = mate_handler.get_sample_by_name(sample_name)
    sp = SamplePipeline(sample)
    pipeline_handler = PipelineHandler(sp, ncores=22, memory=220000, clean_output_dir=True)
    pipeline_handler.run_on_condor()
Example #3
0
def align_sample(args):
    sample_name = args.sampleName
    mate_handler = MateHandler()
    sample = mate_handler.get_sample_by_name(sample_name)

    ## conditionally, submit all the samples directly to condor (remove in the future)
    for mate in sample.mates:
        pipeline = MatePipeline(mate)
        pipeline.setup()
        pipeline_handler = PipelineHandler(pipeline, ncores=10, memory=10900)
        pipeline_handler.run_on_condor()
Example #4
0
        print("*************************************", file=self.log_file_handler)
        print("Splitting:", file=self.log_file_handler)
        self.split_bam_by_chromosome()
        print("*************************************", file=self.log_file_handler)
        print("", file=self.log_file_handler)
        print("", file=self.log_file_handler)


        print("*************************************", file=self.log_file_handler)
        print("Extracting:", file=self.log_file_handler)
        self.extract_methylation()
        print("*************************************", file=self.log_file_handler)
        print("", file=self.log_file_handler)
        print("", file=self.log_file_handler)

        # destroy the logger
        self.log_file_handler.close()


if __name__ == "__main__":
    print("Running sample pipeline for test sample")
    mate_handler = MateHandler("Config/samples_config.yaml")
    sample = mate_handler.get_sample_by_name("test_sample")
    dir_handler = SampleDirInfo(sample)
    # print(bamtools_clean_command(dir_handler.aligned_bam_path, dir_handler.filtered_bam_path))
    # dir_handler = SampleDirInfo(sample)
    # print(dir_handler.list_aligned_bam_files())
    sp = SamplePipeline(sample)
    sp.pipeline()

class FastqcReportWriter(object):
    def __init__(self, fastqc_summary, output_dir="./Fastqc_Writer_Report"):
        self.fastqc_summary = fastqc_summary
        self.file_manager = FastqcReportFileManager(output_dir)

    def write_report_for_tag(self, tag):
        file_path = self.file_manager.file_path_for_tag(tag)
        html_string = self.fastqc_summary.get_html_for_tag(tag)
        with open(file_path, "w") as html_file:
            print(html_string, file=html_file)
        print("Written: " + os.path.basename(file_path))

    def write(self):
        for tag in fastqc_report_tags():
            self.write_report_for_tag(tag)


if __name__ == "__main__":
    mateHandler = MateHandler()
    dirInfos = [MateDirInfo(mate) for mate in mateHandler.list_project_mates()]
    pairs = list(zip([dirInfo.first_fastqc_zip_path for dirInfo in dirInfos],
                     [dirInfo.second_fastqc_zip_path for dirInfo in dirInfos]))
    fastqcSummary = PairedFastqcSummary(pairs)
    writer = FastqcReportWriter(fastqcSummary, "TestData/FastqcSummary")
    writer.write()



Example #6
0
            call_fastqc(self.dir_info.second, self.dir_info.fastqc_output_dir)
        except:
            raise Exception("Couldn't deduplicate for mate " + self.name() + "! Please inspect the log file")

    def call_trim_galore(self):
        try:
            trimmer = TrimGalore(self.dir_info.first,
                                 self.dir_info.second,
                                 self.dir_info.trim_galore_output_dir)
            run_shell_command(trimmer.generate_command())
        except:
            raise Exception("Couldn't trim galore for mate " + self.name() + "! Please inspect the log file")

    def pipeline(self, clean_output_dir=True):
        self.setup(clean_output_dir)
        self.call_fastqc()
        self.call_trim_galore()
        self.call_bismark()
        self.call_deduplicate()
        ##self.extract_methylation()

    def clean(self):
        raise NotImplementedError()

if __name__ == "__main__":
    handler = MateHandler()
    test_mate = handler.get_mate_by_name("S112_tag_6_GCCAAT_L003_14101")
    pipeline = MatePipeline(test_mate)
    pipeline.pipeline()