def fastqc_run(input_files, args, end_type):
    if end_type == "single":
        input_file = input_files[0]
        fastqc_command = Command("fastqc")
        fastqc_params = {
            "-o": os.path.join(args.output, "reports"),
            "-t": args.thread,
            "-k": "7",
            "": input_file
        }

    elif end_type == "pair":
        input_file_1, input_file_2 = input_files
        fastqc_command = Command("fastqc")
        fastqc_params = {
            "-o": os.path.join(args.output, "reports"),
            "-t": args.thread,
            "-k": "7",
            input_file_1 + " " + input_file_2: ""
        }

    else:
        raise ValueError("Unknown end type")

    timer_run(fastqc_command, fastqc_params)
def test_pipeline(args):
    '''
    salmon及びhisat2+salmonを使ったパイプラインを実行する
    '''
    all_files = [
        file for file in os.listdir(args.input)
        if file.endswith(args.fastq_type)
    ]
    files_1 = sorted([
        file for file in os.listdir(args.input)
        if file.endswith("_1" + args.fastq_type)
    ])
    files_2 = sorted([
        file for file in os.listdir(args.input)
        if file.endswith("_2" + args.fastq_type)
    ])

    single_files = list(set(all_files) - set(files_1) - set(files_2))

    # single
    print_single_logo()
    if len(single_files) == 0:
        print("#" * 20, "No single files!", "#" * 20)
        print()
    else:
        for in_file in single_files:
            salmon_pipeline([in_file], args, end_type="single")

    # pair
    print_pair_logo()
    if len(files_1) != len(files_2):
        raise ValueError("There are not complete pairs")
    elif len(files_1) == 0:
        print("#" * 20, "No pair files!", "#" * 20)
        print()
    else:
        for in_file_1, in_file_2 in zip(files_1, files_2):
            salmon_pipeline([in_file_1, in_file_2], args, end_type="pair")

    # multiqc and delete other reports
    multiqc_command = Command("multiqc")
    multiqc_params = {args.output: "", "-o": args.output}
    timer_run(multiqc_command, multiqc_params, deco="+")
    shutil.rmtree(os.path.join(args.output, "reports"))

    # tximport
    tximport_command = Command("Rscript quant2tsv.R")
    tximport_params = {args.output: "", args.method: ""}
    timer_run(tximport_command, tximport_params, deco="*")
def salmon_aln_run(bam_file, basename, args):
    if args.salmon_ref is None:
        ref_path = "/local_volume/salmon_ref_index"
    else:
        ref_path = args.salmon_ref

    salmon_output = os.path.join(args.output, basename + "_exp")
    os.makedirs(salmon_output, exist_ok=True)
    salmon_command = Command("salmon quant")
    salmon_params = {
        "-t": ref_path,
        "-l": "A",
        "-a": bam_file,
        "-o": salmon_output
    }

    timer_run(salmon_command, salmon_params)
def hisat2_run(trim_files, basename, args, end_type):
    if args.hisat2_ref is None:
        ref_path = "/local_volume/hisat2_ref_index"
    else:
        ref_path = args.hisat2_ref
    os.makedir(os.path.join(args.input, "bam_files"), exit_ok=True)
    bam_file_name = basename + ".bam"
    bam_file = os.path.join(args.input, "bam_files", bam_file_name)
    hisat2_command = Command("hisat2")
    if end_type == "single":
        trim_file = trim_files[0]
        hisat2_params = {
            "-x": ref_path,
            "-U": trim_file,
            "-p": args.thread,
            "|": "",
            "samtools": "sort",
            "-@": args.thread,
            "-O": "BAM",
            "- >": bam_file,
            "&&": "",
            "samtools": "index",
            "-@": args.thread,
            bam_file: "",
        }
    else:
        trim_file_1, trim_file_2 = trim_files
        hisat2_params = {
            "-x": ref_path,
            "-1": trim_file_1,
            "-2": trim_file_2,
            "-p": args.thread,
            "|": "",
            "samtools": "sort",
            "-@": args.thread,
            "-O": "BAM",
            "- >": bam_file,
            "&&": "",
            "samtools": "index",
            "-@": args.thread,
            bam_file: "",
        }
    timer_run(hisat2_command, hisat2_params)
    return bam_file
Esempio n. 5
0
def make_salmon_index(args):
    print("salmon ref index is not detected, now creating...")
    os.makedirs("/local_volume/tmp_fa/", exist_ok=True)
    subprocess.run([
        "wget", "--quite", "--quota=0", "-O",
        "/local_volume/tmp_fa/salmon_ref.fa.gz", args.salmon_ref_dl
    ])
    subprocess.run(["gunzip", "/local_volume/tmp_fa/salmon_ref.fa.gz"])
    salmon_index_cmd = Command("salmon index")
    salmon_index_cmd_params = {
        "-t": "/local_volume/tmp_fa/salmon_ref.fa",
        "-i": "/local_volume/salmon_ref_index"
    }
    salmon_index_cmd.parse_params_dict(salmon_index_cmd_params)
    salmon_index_cmd.run()
    shutil.rmtree("/local_volume/tmp_fa")
def fastp_run(input_files, trim_files, basenames, args, end_type):
    fastp_command = Command("fastp")
    if end_type == "single":
        input_file = input_files[0]
        trim_file = trim_files[0]
        basename = basenames[0]
        fastp_params = {
            "-i": input_file,
            "-o": trim_file,
            "-w": args.thread,
            "-h": os.path.join(args.output, "reports",
                               basename + "_fastp.html"),
            "-j": os.path.join(args.output, "reports",
                               basename + "_fastp.json")
        }

    elif end_type == "pair":
        input_file_1, input_file_2 = input_files
        trim_file_1, trim_file_2 = trim_files
        basename_1, basename_2 = basenames
        fastp_params = {
            "-i": input_file_1,
            "-I": input_file_2,
            "-o": trim_file_1,
            "-O": trim_file_2,
            "-w": args.thread,
            "-h": os.path.join(args.output, "reports",
                               basename + "_fastp.html"),
            "-j": os.path.join(args.output, "reports",
                               basename + "_fastp.json")
        }
    else:
        raise ValueError("Unknown end type")

    if args.fastp is not None:
        fastp_params = update_params(fastp_params, args.fastp)
    timer_run(fastp_command, fastp_params)
def salmon_map_run(trim_files, basename, args, end_type):
    if args.salmon_ref is None:
        ref_path = "/local_volume/salmon_ref_index"
    else:
        ref_path = args.salmon_ref

    salmon_output = os.path.join(args.output, basename + "_exp")
    os.makedirs(salmon_output, exist_ok=True)
    salmon_command = Command("salmon quant")

    if end_type == "single":
        trim_file = trim_files[0]
        salmon_params = {
            "-i": ref_path,
            "-p": args.thread,
            "-l": "A",
            "-r": trim_file,
            "-o": salmon_output,
            "--gcBias": "",
            "--validateMappings": "",
        }

    elif end_type == "pair":
        trim_file_1, trim_file_2 = trim_files
        salmon_params = {
            "-i": ref_path,
            "-p": args.thread,
            "-l": "A",
            "-1": trim_file_1,
            "-2": trim_file_2,
            "-o": salmon_output,
            "--gcBexias": "",
            "--validateMappings": "",
        }

    timer_run(salmon_command, salmon_params)
Esempio n. 8
0
from function import Command, update_params
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--fastqc", nargs="*")
args = parser.parse_args()

fastqc_dict = {"-o": "test", "-t": "2", "-k": "7", "": "FILENAME"}
fastqc_command = Command("fastqc")
fastqc_command.parse_params_dict(fastqc_dict)
print(fastqc_command)

update_fastqc_params = args.fastqc
updated_dict = update_params(fastqc_dict, update_fastqc_params)
fastqc_command.parse_params_dict(updated_dict)
print(updated_dict)