def print_sample_statistics(args):
    print("Counting statistics")
    sample_name = args.sampleName
    mate_handler = MateHandler()
    sample = mate_handler.get_sample_by_name(sample_name)

    total_first = 0
    total_second = 0
    total_aligned = 0
    total_deduplicated = 0

    for mate in sample.mates:
        di = MateDirInfo(mate)

        total_first += count_fastq_reads(di.first_fastqc_zip_path)
        total_second += count_fastq_reads(di.second_fastqc_zip_path)

        alignment_report = BismarkReport.fromfile(di.alignment_report_path)
        total_aligned += alignment_report.num_of_aligned

        drp = DeduplicationReportParser(di.deduplication_report_path)
        total_deduplicated += drp.sequences_left()

    print("Total first: ", total_first)
    print("Total second: ", total_second)
    print("Total algined: ", total_aligned)
    print("Total deduplicated: ", total_deduplicated)
def merge_sample_alignments(args):
    sample_name = args.sampleName
    mate_handler = MateHandler()
    sample = mate_handler.get_sample_by_name(sample_name)
    sp = SamplePipeline(sample)
    pipeline_handler = PipelineHandler(sp, ncores=22, memory=220000, clean_output_dir=True)
    pipeline_handler.run_on_condor()
Beispiel #3
0
def align_sample(args):
    sample_name = args.sampleName
    mate_handler = MateHandler()
    sample = mate_handler.get_sample_by_name(sample_name)

    ## conditionally, submit all the samples directly to condor (remove in the future)
    for mate in sample.mates:
        pipeline = MatePipeline(mate)
        pipeline.setup()
        pipeline_handler = PipelineHandler(pipeline, ncores=10, memory=10900)
        pipeline_handler.run_on_condor()
        print("*************************************", file=self.log_file_handler)
        print("Splitting:", file=self.log_file_handler)
        self.split_bam_by_chromosome()
        print("*************************************", file=self.log_file_handler)
        print("", file=self.log_file_handler)
        print("", file=self.log_file_handler)


        print("*************************************", file=self.log_file_handler)
        print("Extracting:", file=self.log_file_handler)
        self.extract_methylation()
        print("*************************************", file=self.log_file_handler)
        print("", file=self.log_file_handler)
        print("", file=self.log_file_handler)

        # destroy the logger
        self.log_file_handler.close()


if __name__ == "__main__":
    print("Running sample pipeline for test sample")
    mate_handler = MateHandler("Config/samples_config.yaml")
    sample = mate_handler.get_sample_by_name("test_sample")
    dir_handler = SampleDirInfo(sample)
    # print(bamtools_clean_command(dir_handler.aligned_bam_path, dir_handler.filtered_bam_path))
    # dir_handler = SampleDirInfo(sample)
    # print(dir_handler.list_aligned_bam_files())
    sp = SamplePipeline(sample)
    sp.pipeline()