def print_sample_statistics(args): print("Counting statistics") sample_name = args.sampleName mate_handler = MateHandler() sample = mate_handler.get_sample_by_name(sample_name) total_first = 0 total_second = 0 total_aligned = 0 total_deduplicated = 0 for mate in sample.mates: di = MateDirInfo(mate) total_first += count_fastq_reads(di.first_fastqc_zip_path) total_second += count_fastq_reads(di.second_fastqc_zip_path) alignment_report = BismarkReport.fromfile(di.alignment_report_path) total_aligned += alignment_report.num_of_aligned drp = DeduplicationReportParser(di.deduplication_report_path) total_deduplicated += drp.sequences_left() print("Total first: ", total_first) print("Total second: ", total_second) print("Total algined: ", total_aligned) print("Total deduplicated: ", total_deduplicated)
def merge_sample_alignments(args): sample_name = args.sampleName mate_handler = MateHandler() sample = mate_handler.get_sample_by_name(sample_name) sp = SamplePipeline(sample) pipeline_handler = PipelineHandler(sp, ncores=22, memory=220000, clean_output_dir=True) pipeline_handler.run_on_condor()
def align_sample(args): sample_name = args.sampleName mate_handler = MateHandler() sample = mate_handler.get_sample_by_name(sample_name) ## conditionally, submit all the samples directly to condor (remove in the future) for mate in sample.mates: pipeline = MatePipeline(mate) pipeline.setup() pipeline_handler = PipelineHandler(pipeline, ncores=10, memory=10900) pipeline_handler.run_on_condor()
print("*************************************", file=self.log_file_handler) print("Splitting:", file=self.log_file_handler) self.split_bam_by_chromosome() print("*************************************", file=self.log_file_handler) print("", file=self.log_file_handler) print("", file=self.log_file_handler) print("*************************************", file=self.log_file_handler) print("Extracting:", file=self.log_file_handler) self.extract_methylation() print("*************************************", file=self.log_file_handler) print("", file=self.log_file_handler) print("", file=self.log_file_handler) # destroy the logger self.log_file_handler.close() if __name__ == "__main__": print("Running sample pipeline for test sample") mate_handler = MateHandler("Config/samples_config.yaml") sample = mate_handler.get_sample_by_name("test_sample") dir_handler = SampleDirInfo(sample) # print(bamtools_clean_command(dir_handler.aligned_bam_path, dir_handler.filtered_bam_path)) # dir_handler = SampleDirInfo(sample) # print(dir_handler.list_aligned_bam_files()) sp = SamplePipeline(sample) sp.pipeline()
class FastqcReportWriter(object): def __init__(self, fastqc_summary, output_dir="./Fastqc_Writer_Report"): self.fastqc_summary = fastqc_summary self.file_manager = FastqcReportFileManager(output_dir) def write_report_for_tag(self, tag): file_path = self.file_manager.file_path_for_tag(tag) html_string = self.fastqc_summary.get_html_for_tag(tag) with open(file_path, "w") as html_file: print(html_string, file=html_file) print("Written: " + os.path.basename(file_path)) def write(self): for tag in fastqc_report_tags(): self.write_report_for_tag(tag) if __name__ == "__main__": mateHandler = MateHandler() dirInfos = [MateDirInfo(mate) for mate in mateHandler.list_project_mates()] pairs = list(zip([dirInfo.first_fastqc_zip_path for dirInfo in dirInfos], [dirInfo.second_fastqc_zip_path for dirInfo in dirInfos])) fastqcSummary = PairedFastqcSummary(pairs) writer = FastqcReportWriter(fastqcSummary, "TestData/FastqcSummary") writer.write()
call_fastqc(self.dir_info.second, self.dir_info.fastqc_output_dir) except: raise Exception("Couldn't deduplicate for mate " + self.name() + "! Please inspect the log file") def call_trim_galore(self): try: trimmer = TrimGalore(self.dir_info.first, self.dir_info.second, self.dir_info.trim_galore_output_dir) run_shell_command(trimmer.generate_command()) except: raise Exception("Couldn't trim galore for mate " + self.name() + "! Please inspect the log file") def pipeline(self, clean_output_dir=True): self.setup(clean_output_dir) self.call_fastqc() self.call_trim_galore() self.call_bismark() self.call_deduplicate() ##self.extract_methylation() def clean(self): raise NotImplementedError() if __name__ == "__main__": handler = MateHandler() test_mate = handler.get_mate_by_name("S112_tag_6_GCCAAT_L003_14101") pipeline = MatePipeline(test_mate) pipeline.pipeline()