def fastqc_run(input_files, args, end_type): if end_type == "single": input_file = input_files[0] fastqc_command = Command("fastqc") fastqc_params = { "-o": os.path.join(args.output, "reports"), "-t": args.thread, "-k": "7", "": input_file } elif end_type == "pair": input_file_1, input_file_2 = input_files fastqc_command = Command("fastqc") fastqc_params = { "-o": os.path.join(args.output, "reports"), "-t": args.thread, "-k": "7", input_file_1 + " " + input_file_2: "" } else: raise ValueError("Unknown end type") timer_run(fastqc_command, fastqc_params)
def test_pipeline(args): ''' salmon及びhisat2+salmonを使ったパイプラインを実行する ''' all_files = [ file for file in os.listdir(args.input) if file.endswith(args.fastq_type) ] files_1 = sorted([ file for file in os.listdir(args.input) if file.endswith("_1" + args.fastq_type) ]) files_2 = sorted([ file for file in os.listdir(args.input) if file.endswith("_2" + args.fastq_type) ]) single_files = list(set(all_files) - set(files_1) - set(files_2)) # single print_single_logo() if len(single_files) == 0: print("#" * 20, "No single files!", "#" * 20) print() else: for in_file in single_files: salmon_pipeline([in_file], args, end_type="single") # pair print_pair_logo() if len(files_1) != len(files_2): raise ValueError("There are not complete pairs") elif len(files_1) == 0: print("#" * 20, "No pair files!", "#" * 20) print() else: for in_file_1, in_file_2 in zip(files_1, files_2): salmon_pipeline([in_file_1, in_file_2], args, end_type="pair") # multiqc and delete other reports multiqc_command = Command("multiqc") multiqc_params = {args.output: "", "-o": args.output} timer_run(multiqc_command, multiqc_params, deco="+") shutil.rmtree(os.path.join(args.output, "reports")) # tximport tximport_command = Command("Rscript quant2tsv.R") tximport_params = {args.output: "", args.method: ""} timer_run(tximport_command, tximport_params, deco="*")
def salmon_aln_run(bam_file, basename, args): if args.salmon_ref is None: ref_path = "/local_volume/salmon_ref_index" else: ref_path = args.salmon_ref salmon_output = os.path.join(args.output, basename + "_exp") os.makedirs(salmon_output, exist_ok=True) salmon_command = Command("salmon quant") salmon_params = { "-t": ref_path, "-l": "A", "-a": bam_file, "-o": salmon_output } timer_run(salmon_command, salmon_params)
def hisat2_run(trim_files, basename, args, end_type): if args.hisat2_ref is None: ref_path = "/local_volume/hisat2_ref_index" else: ref_path = args.hisat2_ref os.makedir(os.path.join(args.input, "bam_files"), exit_ok=True) bam_file_name = basename + ".bam" bam_file = os.path.join(args.input, "bam_files", bam_file_name) hisat2_command = Command("hisat2") if end_type == "single": trim_file = trim_files[0] hisat2_params = { "-x": ref_path, "-U": trim_file, "-p": args.thread, "|": "", "samtools": "sort", "-@": args.thread, "-O": "BAM", "- >": bam_file, "&&": "", "samtools": "index", "-@": args.thread, bam_file: "", } else: trim_file_1, trim_file_2 = trim_files hisat2_params = { "-x": ref_path, "-1": trim_file_1, "-2": trim_file_2, "-p": args.thread, "|": "", "samtools": "sort", "-@": args.thread, "-O": "BAM", "- >": bam_file, "&&": "", "samtools": "index", "-@": args.thread, bam_file: "", } timer_run(hisat2_command, hisat2_params) return bam_file
def make_salmon_index(args): print("salmon ref index is not detected, now creating...") os.makedirs("/local_volume/tmp_fa/", exist_ok=True) subprocess.run([ "wget", "--quite", "--quota=0", "-O", "/local_volume/tmp_fa/salmon_ref.fa.gz", args.salmon_ref_dl ]) subprocess.run(["gunzip", "/local_volume/tmp_fa/salmon_ref.fa.gz"]) salmon_index_cmd = Command("salmon index") salmon_index_cmd_params = { "-t": "/local_volume/tmp_fa/salmon_ref.fa", "-i": "/local_volume/salmon_ref_index" } salmon_index_cmd.parse_params_dict(salmon_index_cmd_params) salmon_index_cmd.run() shutil.rmtree("/local_volume/tmp_fa")
def fastp_run(input_files, trim_files, basenames, args, end_type): fastp_command = Command("fastp") if end_type == "single": input_file = input_files[0] trim_file = trim_files[0] basename = basenames[0] fastp_params = { "-i": input_file, "-o": trim_file, "-w": args.thread, "-h": os.path.join(args.output, "reports", basename + "_fastp.html"), "-j": os.path.join(args.output, "reports", basename + "_fastp.json") } elif end_type == "pair": input_file_1, input_file_2 = input_files trim_file_1, trim_file_2 = trim_files basename_1, basename_2 = basenames fastp_params = { "-i": input_file_1, "-I": input_file_2, "-o": trim_file_1, "-O": trim_file_2, "-w": args.thread, "-h": os.path.join(args.output, "reports", basename + "_fastp.html"), "-j": os.path.join(args.output, "reports", basename + "_fastp.json") } else: raise ValueError("Unknown end type") if args.fastp is not None: fastp_params = update_params(fastp_params, args.fastp) timer_run(fastp_command, fastp_params)
def salmon_map_run(trim_files, basename, args, end_type): if args.salmon_ref is None: ref_path = "/local_volume/salmon_ref_index" else: ref_path = args.salmon_ref salmon_output = os.path.join(args.output, basename + "_exp") os.makedirs(salmon_output, exist_ok=True) salmon_command = Command("salmon quant") if end_type == "single": trim_file = trim_files[0] salmon_params = { "-i": ref_path, "-p": args.thread, "-l": "A", "-r": trim_file, "-o": salmon_output, "--gcBias": "", "--validateMappings": "", } elif end_type == "pair": trim_file_1, trim_file_2 = trim_files salmon_params = { "-i": ref_path, "-p": args.thread, "-l": "A", "-1": trim_file_1, "-2": trim_file_2, "-o": salmon_output, "--gcBexias": "", "--validateMappings": "", } timer_run(salmon_command, salmon_params)
from function import Command, update_params import argparse parser = argparse.ArgumentParser() parser.add_argument("--fastqc", nargs="*") args = parser.parse_args() fastqc_dict = {"-o": "test", "-t": "2", "-k": "7", "": "FILENAME"} fastqc_command = Command("fastqc") fastqc_command.parse_params_dict(fastqc_dict) print(fastqc_command) update_fastqc_params = args.fastqc updated_dict = update_params(fastqc_dict, update_fastqc_params) fastqc_command.parse_params_dict(updated_dict) print(updated_dict)