Example #1
0
def stat_kmer_depth(r1, r2, name, kmer_length, thread, job_type, concurrent, refresh, work_dir, out_dir):

    dag = DAG("survey_data")
    data_task, r1, r2 = merge_data_task(
        name=name,
        r1=r1,
        r2=r2,
        job_type=job_type,
        work_dir=work_dir,
        out_dir=out_dir)

    kmerfreq_task, kmer_stat, option= create_kmerfreq_task(
        r1=r1,
        r2=r2,
        name=name,
        kmer_length=kmer_length,
        thread=thread,
        job_type=job_type,
        work_dir=work_dir,
        out_dir=out_dir)

    dag.add_task(data_task)
    dag.add_task(kmerfreq_task)
    kmerfreq_task.set_upstream(data_task)
    do_dag(dag, concurrent, refresh)

    return r1, r2, kmer_stat, option
Example #2
0
def run_ncovann(genomes, refgff, concurrent, refresh, job_type, work_dir,
                out_dir):

    genomes = check_paths(genomes)
    refgff = check_path(refgff)
    work_dir = mkdir(work_dir)
    out_dir = mkdir(out_dir)
    dag = DAG("ncovann")

    for genome in genomes:
        name = genome.split('/')[-1]
        if '--' in name:
            name = name.split('--')[0].split('.')[0]
        else:
            name = name.split('.')[0]

        ann_task = create_ncovann_task(genome=genome,
                                       name=name,
                                       refgff=refgff,
                                       job_type=job_type,
                                       work_dir=work_dir,
                                       out_dir=out_dir)

        dag.add_task(ann_task)

    do_dag(dag, concurrent, refresh)

    return 0
Example #3
0
def run_gc_depth(genome, fastq_list, name, window, thread, job_type,
                 concurrent, refresh, work_dir, out_dir):

    genome, fastq_list = check_paths([genome, fastq_list])

    sort_bam, genome = bwa_mem(fastq_list=fastq_list,
                               genome=genome,
                               name=name,
                               number=5000000,
                               data_type='',
                               thread=thread,
                               job_type=job_type,
                               concurrent=concurrent,
                               refresh=refresh,
                               work_dir=work_dir,
                               out_dir=work_dir)

    sort_bam = check_paths(sort_bam)
    dag = DAG("gc_depth")

    gc_depth_task, gc_depth_png = stat_gc_depth_task(genome=genome,
                                                     bam=sort_bam,
                                                     name=name,
                                                     window=window,
                                                     job_type=job_type,
                                                     work_dir=work_dir,
                                                     out_dir=out_dir)

    dag.add_task(gc_depth_task)
    do_dag(dag, concurrent, refresh)

    return gc_depth_png
Example #4
0
def choose_data(r1, r2, name, kmer_length, kmer_depth, thread, job_type, concurrent, refresh, work_dir, out_dir):

    r1, r2, kmer_stat, option = stat_kmer_depth(
        r1=r1,
        r2=r2,
        name=name,
        kmer_length=kmer_length,
        thread=thread,
        job_type=job_type,
        concurrent=concurrent,
        refresh=refresh,
        work_dir=work_dir,
        out_dir=out_dir)

    dag = DAG("choose_data")
    data_task, r1, r2 = choose_data_task(
        r1=r1,
        r2=r2,
        name=name,
        kmer_stat=kmer_stat,
        kmer_depth=kmer_depth,
        job_type=job_type,
        work_dir=work_dir,
        out_dir=out_dir)
    dag.add_task(data_task)
    do_dag(dag, concurrent, refresh)

    return option, r1, r2
Example #5
0
def run_ncovsnp(reads,
                reffa,
                refgb,
                thread,
                concurrent,
                refresh,
                job_type,
                work_dir,
                out_dir,
                clean=""):

    reads = check_paths(reads)
    reffa = check_path(reffa)
    refgb = check_path(refgb)
    work_dir = mkdir(work_dir)
    out_dir = mkdir(out_dir)
    options = {"software": OrderedDict(), "database": OrderedDict()}

    dag = DAG("ncovsnp")
    option = OrderedDict()
    depths = os.path.join(work_dir, "*/*.depth.xls")
    snps = os.path.join(work_dir, "*/*.snps.gff")

    stat_map_task = stat_mapcover_snp(reads=' '.join(reads),
                                      clean=clean,
                                      depths=depths,
                                      snps=snps,
                                      job_type="local",
                                      work_dir=work_dir,
                                      out_dir=out_dir)
    genomes = []

    for read in reads:
        name = read.split('/')[-1]
        if '--' in name:
            name = name.split('--')[0].split('.')[0]
        else:
            name = name.split('.')[0]

        name_work = mkdir(os.path.join(work_dir, name))
        snp_task, snippy_task, concencus, option = create_ncovsnp_tasks(
            read=read,
            name=name,
            reffa=reffa,
            refgb=refgb,
            thread=thread,
            job_type=job_type,
            work_dir=name_work,
            out_dir=out_dir)
        genomes.append(concencus)
        dag.add_task(snp_task)
        dag.add_task(snippy_task)
        stat_map_task.set_upstream(snp_task)
        stat_map_task.set_upstream(snippy_task)
    options["software"] = option
    dag.add_task(stat_map_task)
    do_dag(dag, concurrent, refresh)

    return genomes, options
Example #6
0
def run_ncovqc(reads, reference, thread, job_type, concurrent, refresh,
               work_dir, out_dir):

    reference = check_path(reference)
    work_dir = mkdir(work_dir)
    out_dir = mkdir(out_dir)
    reads = check_paths(reads)
    names = []

    for i in reads:
        name = i.split('/')[-1]
        if '--' in name:
            name = name.split('--')[1].split('.bam')[0]
        else:
            name = name.split('.')[0]
        names.append(name)

    options = {"software": OrderedDict(), "database": OrderedDict()}

    dag = DAG("ncovqc")
    raw_task, raw_stat = stat_reads_task(reads=" ".join(reads),
                                         name="raw",
                                         thread=thread,
                                         job_type=job_type,
                                         work_dir=work_dir,
                                         out_dir=out_dir)

    map_tasks, clean_reads, option = map_ref_tasks(reads=reads,
                                                   names=names,
                                                   reference=reference,
                                                   thread=thread,
                                                   job_type=job_type,
                                                   work_dir=work_dir,
                                                   out_dir=out_dir)
    options["software"] = option

    clean_task, clean_stat = stat_reads_task(reads=clean_reads,
                                             name="clean",
                                             thread=thread,
                                             job_type=job_type,
                                             work_dir=work_dir,
                                             out_dir=out_dir)
    dag.add_task(raw_task)
    dag.add_task(*map_tasks)
    dag.add_task(clean_task)
    clean_task.set_upstream(*map_tasks)
    do_dag(dag, concurrent, refresh)

    return clean_reads, clean_stat, options
Example #7
0
def split_data(r1, r2, name, number, job_type, concurrent, refresh, work_dir, out_dir, platform="illumina"):

    if platform in ["PromethION", "GridION" , "RSII", "Sequel"]:
        read = "%s.part_*.fast*" % name
        r2 = ""
    elif platform in ["illumina", "mgi"]:
        read = "%s.r1.part_*.fastq" % name
    else:
        raise Exception("The input sequencing platform is abnormal.")

    dag = DAG("split_data")
    task = Task(
        id="split_data",
        work_dir=work_dir,
        type=job_type,
        option="-pe smp 1",
        script="""
{script}/splitfp.py -r1 {r1} -r2 {r2} -o {name} -n {number}
#cp {name}.* {out_dir}
""".format(
            script=SCRIPTS,
            r1=r1,
            r2=r2,
            name=name,
            number=number,
            out_dir=out_dir
        )
    )

    dag.add_task(task)
    do_dag(dag, concurrent, refresh)

    temp = read_files(work_dir, read)
    reads = []

    if platform in ["illumina", "mgi"]:
        for i in temp:
            j = i.replace(".r1.part_", ".r2.part_")
            reads.append("%s %s" % (i, j))
    else:
        reads = temp

    return reads
Example #8
0
def bwa_mem(fastq_list, genome, name, number, data_type, thread, job_type,
            concurrent, refresh, work_dir, out_dir):

    genome, fastq_list = check_paths([genome, fastq_list])
    work_dir = mkdir(work_dir)
    out_dir = mkdir(out_dir)

    dag = DAG("split_ngs")
    split_work = mkdir(os.path.join(work_dir, "00_data"))
    split_out = mkdir(os.path.join(out_dir, "00_data"))

    splitfp_task, fq_path, r1_name, r2_name = split_ngs_task(
        fastq_list=fastq_list,
        name=name,
        number=number,
        data_type=data_type,
        job_type=job_type,
        work_dir=split_work,
        out_dir=split_out)
    dag.add_task(splitfp_task)
    do_dag(dag, concurrent, refresh)

    dag = DAG("bwa_mem")
    index_task, bwa_tasks, merge_task, sorted_bam, genome = run_bwa_mem(
        fq_path=fq_path,
        r1_name=r1_name,
        r2_name=r2_name,
        genome=genome,
        name=name,
        thread=thread,
        job_type=job_type,
        work_dir=work_dir,
        out_dir=out_dir)

    dag.add_task(index_task)
    dag.add_task(*bwa_tasks)
    dag.add_task(merge_task)
    index_task.set_downstream(*bwa_tasks)
    merge_task.set_upstream(*bwa_tasks)

    do_dag(dag, concurrent, refresh)

    return sorted_bam, genome
Example #9
0
def run_minimap(reads, genome, platform, name, split, thread, job_type,
                concurrent, refresh, work_dir, out_dir):

    option = OrderedDict()
    option["minimap2"] = {
        "version": get_version(SOFTWARE_VERSION["minimap2"]),
        "option": "%s" % SEQUENCER[platform]["minimap2"]
    }

    work_dict = {"minimap": "01_minimap", "merge": "02_merge"}

    for k, v in work_dict.items():
        mkdir(os.path.join(work_dir, v))

    dag = DAG("minimap")

    minimap_tasks, bams = create_minimap_tasks(reads=reads,
                                               genome=genome,
                                               platform=platform,
                                               name=name,
                                               thread=thread,
                                               job_type=job_type,
                                               work_dir=os.path.join(
                                                   work_dir,
                                                   work_dict["minimap"]),
                                               out_dir=out_dir,
                                               split=split)

    merge_task, bam = merge_bam_task(bams=bams,
                                     name=name,
                                     thread=thread,
                                     job_type=job_type,
                                     work_dir=os.path.join(
                                         work_dir, work_dict["merge"]),
                                     out_dir=out_dir)

    dag.add_task(*minimap_tasks)
    dag.add_task(merge_task)
    merge_task.set_upstream(*minimap_tasks)
    do_dag(dag, concurrent, refresh)

    return bam, option
Example #10
0
def split_data(r1, r2, name, number, job_type, work_dir, out_dir):

    if len(r1) != len(r2) and len(r2) <= 1:
        read = "%s.part_*.fast*" % name
        r2 = ""
    elif len(r1) == len(r2):
        read = "%s.r1.part_*.fastq" % name
    else:
        raise Exception("The input sequencing platform is abnormal.")

    dag = DAG("split_data")
    task = Task(id="split_data",
                work_dir=work_dir,
                type=job_type,
                option="-pe smp 1",
                script="""
{script}/splitfp.py -r1 {r1} -r2 {r2} -o {name} -n {number}
#cp {name}.* {out_dir}
""".format(script=SCRIPTS,
           r1=r1,
           r2=r2,
           name=name,
           number=number,
           out_dir=out_dir))

    dag.add_task(task)
    do_dag(dag, 8, 10)

    temp = read_files(work_dir, read)
    reads = []

    if len(r1) == len(r2):
        for i in temp:
            j = i.replace(".r1.part_", ".r2.part_")
            reads.append("%s %s" % (i, j))
    else:
        reads = temp

    return reads
Example #11
0
def run_gc_depth(genome, r1, r2, name, platform, split, window, thread,
                 job_type, concurrent, refresh, work_dir, out_dir):

    genome = check_path(genome)
    r1 = check_paths(r1)
    r2 = check_paths(r2)

    sort_bam = minimap(r1=r1,
                       r2=r2,
                       genome=genome,
                       name=name,
                       split=split,
                       platform=platform,
                       number=5000000,
                       thread=thread,
                       job_type=job_type,
                       concurrent=concurrent,
                       refresh=refresh,
                       work_dir=work_dir,
                       out_dir=out_dir)

    sort_bam = check_paths(sort_bam)
    dag = DAG("gc_depth")

    gc_depth_task, gc_depth_png = stat_gc_depth_task(genome=genome,
                                                     bam=sort_bam,
                                                     name=name,
                                                     window=window,
                                                     job_type=job_type,
                                                     work_dir=work_dir,
                                                     out_dir=out_dir)

    dag.add_task(gc_depth_task)
    do_dag(dag, concurrent, refresh)

    return gc_depth_png
Example #12
0
def run_survey(r1, r2, name, trim, kingdom, kmer_length, sample_depth, thread,
               asm, window, job_type, queue, concurrent, refresh, work_dir,
               out_dir):

    work_dir = mkdir(work_dir)
    out_dir = mkdir(out_dir)
    r1 = check_paths(r1)
    r2 = check_paths(r2)

    dag = DAG("survey_qc")
    merge_task, qc_task, cont_task, result_task, clean1, clean2, quality, content, gc, stat_qc, poll_png, poll_tsv = ngs_qc_tasks(
        name=name,
        r1=r1,
        r2=r2,
        trim=trim,
        thread=thread,
        job_type=job_type,
        work_dir=work_dir,
        out_dir=out_dir)

    data_work = mkdir(os.path.join(work_dir, "choose_data"))
    freq_task1, histo1, kmer_stat, estimate1 = kmerfreq_task(
        r1=clean1,
        r2=clean2,
        name=name,
        kmer_length=kmer_length,
        thread=thread,
        job_type=job_type,
        work_dir=data_work,
        out_dir=data_work)

    dag.add_task(merge_task)
    dag.add_task(qc_task)
    qc_task.set_upstream(merge_task)
    dag.add_task(cont_task)
    dag.add_task(result_task)
    dag.add_task(freq_task1)
    freq_task1.set_upstream(qc_task)
    cont_task.set_upstream(qc_task)
    result_task.set_upstream(qc_task)

    do_dag(dag, concurrent, refresh)

    for line in read_tsv(kmer_stat):
        if line[0] == "kmer_depth":
            kmer_depth = int(line[1])

    if sample_depth > kmer_depth:
        LOG.debug(
            'The amount of sequencing data may be insufficient. Sequencing depth is only %s X'
            % kmer_depth)
        sample_depth = kmer_depth
    proportion = sample_depth * 1.0 / kmer_depth

    dag = DAG("survey")
    choose_task, freq_task, heter_task, jellyfish_task, gse_scope_task, denovo_task, stat_heter, heter_png, scope_txt, gse_txt, scope_png, gse_png, stat_genome, genome, ngs_list = kmer_denovo_tasks(
        r1=clean1,
        r2=clean2,
        name=name,
        kmer_length=kmer_length,
        proportion=proportion,
        kingdom=kingdom,
        thread=thread,
        job_type=job_type,
        queue=queue,
        work_dir=work_dir,
        out_dir=out_dir)
    if asm == "true":
        dag.add_task(denovo_task)
    else:
        genome = "false"
        stat_genome = "false"
        ngs_list = "false"

    dag.add_task(choose_task)
    dag.add_task(freq_task)
    dag.add_task(heter_task)
    freq_task.set_upstream(choose_task)
    dag.add_task(jellyfish_task)
    jellyfish_task.set_upstream(choose_task)
    dag.add_task(gse_scope_task)
    heter_task.set_upstream(freq_task)
    gse_scope_task.set_upstream(jellyfish_task)
    do_dag(dag, concurrent, refresh)

    if ngs_list == "false":
        print("Genomics are not assembled")
        gc_depth_png = heter_png
    else:
        depth_work = mkdir(os.path.join(work_dir, "05_GC-depth"))
        depth_out = mkdir(os.path.join(out_dir, "05_GC-depth"))
        gc_depth_png = run_gc_depth(genome=genome,
                                    fastq_list=ngs_list,
                                    name=name,
                                    window=window,
                                    thread=thread,
                                    job_type=job_type,
                                    concurrent=concurrent,
                                    refresh=refresh,
                                    work_dir=depth_work,
                                    out_dir=depth_out)

    run_report(name, asm, kmer_length, stat_qc, quality, content, gc, poll_tsv,
               poll_png, stat_heter, heter_png, scope_txt, gse_txt, scope_png,
               gse_png, stat_genome, gc_depth_png, out_dir)

    return stat_qc, quality, content, gc, poll_png, poll_tsv, stat_heter, heter_png, scope_txt, gse_txt, scope_png, gse_png, stat_genome
Example #13
0
def run_filter_contamination(r1, r2, name, kmer_length, kmer_depth, taxid, kingdom, thread, job_type, concurrent, refresh, work_dir, out_dir, split, mode="fast", cratio=10):

    work_dir = mkdir(work_dir)
    out_dir = mkdir(out_dir)
    r1 = check_paths(r1)
    r2 = check_paths(r2)
    taxid = check_path(taxid)
    options = {
        "software": OrderedDict(),
        "database": OrderedDict()
    }

    option, r1, r2 = choose_data(
        r1=r1,
        r2=r2,
        name=name,
        kmer_length=kmer_length,
        kmer_depth=kmer_depth,
        thread=thread,
        job_type=job_type,
        concurrent=concurrent,
        refresh=refresh,
        work_dir=work_dir,
        out_dir=out_dir)
    options["software"].update(option)

    if mode!="fast":
        work_dict = {
            "data": "00_data",
            "ref": "01_ref",
            "ump": "02_ump"
        }
        for k, v in work_dict.items():
            mkdir(os.path.join(work_dir, v))

        reads = split_data(
            r1=r1,
            r2=r2,
            name=name,
            number=2000000,
            job_type=job_type,
            work_dir=os.path.join(work_dir, work_dict["data"]),
            concurrent=concurrent,
            refresh=refresh,
            out_dir=out_dir,
            platform="illumina")

        dag = DAG("unmap_data")
        ref_task, ref= obtain_contamination_task(
            taxid=taxid,
            name=name,
            kingdom=kingdom,
            job_type=job_type,
            work_dir=os.path.join(work_dir, work_dict["ref"]),
            out_dir=out_dir,
            mode=mode,
            cratio=cratio)
        dag.add_task(ref_task)

        unmap_tasks, reads, option = create_unmap_tasks(
            name=name,
            reference=ref,
            reads=reads,
            thread=thread,
            job_type=job_type,
            work_dir=os.path.join(work_dir, work_dict["ump"]),
            out_dir=out_dir,
            split=split)
        dag.add_task(*unmap_tasks)
        ref_task.set_downstream(*unmap_tasks)
        do_dag(dag, concurrent, refresh)
        options["software"].update(option)

        reads = [reads]
    else:
        reads = [r1, r2]

    return reads, options
Example #14
0
def run_kmer_denovo(r1, r2, name, kingdom, kmer_length, sample_depth, thread,
                    asm, window, job_type, queue, concurrent, refresh,
                    work_dir, out_dir):

    work_dir = mkdir(work_dir)
    out_dir = mkdir(out_dir)
    r1 = check_paths(r1)
    r2 = check_paths(r2)

    if r1[0].endswith(".gz") or r2[0].endswith(".gz"):
        tools = "zcat"
    else:
        tools = "cat"

    dag_data = DAG("survey_data")

    data_work = mkdir(os.path.join(work_dir, "choose_data"))
    cat_data_task, clean1, clean2 = merge_raw_data_task(name=name,
                                                        r1=" ".join(r1),
                                                        r2=" ".join(r2),
                                                        tools=tools,
                                                        job_type=job_type,
                                                        work_dir=data_work,
                                                        out_dir=data_work)

    freq_task1, histo1, kmer_stat, estimate1 = kmerfreq_task(
        r1=clean1,
        r2=clean2,
        name=name,
        kmer_length=17,
        thread=thread,
        job_type=job_type,
        work_dir=data_work,
        out_dir=data_work)

    dag_data.add_task(cat_data_task)
    dag_data.add_task(freq_task1)
    freq_task1.set_upstream(cat_data_task)
    do_dag(dag_data, concurrent, refresh)

    for line in read_tsv(kmer_stat):
        if line[0] == "kmer_depth":
            kmer_depth = int(line[1])

    if sample_depth > kmer_depth:
        LOG.debug(
            'The amount of sequencing data may be insufficient. Sequencing depth is only %s X'
            % kmer_depth)
        sample_depth = kmer_depth
    proportion = sample_depth * 1.0 / kmer_depth

    dag = DAG("survey")

    choose_task, freq_task, heter_task, jellyfish_task, gse_scope_task, denovo_task, stat_heter, heter_png, scope_txt, gse_txt, scope_png, gse_png, stat_genome, genome, ngs_list = kmer_denovo_tasks(
        r1=clean1,
        r2=clean2,
        name=name,
        kmer_length=kmer_length,
        proportion=proportion,
        kingdom=kingdom,
        thread=thread,
        job_type=job_type,
        queue=queue,
        work_dir=work_dir,
        out_dir=out_dir)
    if asm == "true":
        dag.add_task(denovo_task)
    else:
        genome = "false"
        stat_genome = "false"
        ngs_list = "false"

    dag.add_task(choose_task)
    dag.add_task(freq_task)
    dag.add_task(heter_task)
    freq_task.set_upstream(choose_task)
    dag.add_task(jellyfish_task)
    jellyfish_task.set_upstream(choose_task)
    dag.add_task(gse_scope_task)
    heter_task.set_upstream(freq_task)
    gse_scope_task.set_upstream(jellyfish_task)
    do_dag(dag, concurrent, refresh)

    if ngs_list == "false":
        print("Genomics are not assembled")
        gc_depth_png = heter_png
    else:
        depth_work = mkdir(os.path.join(work_dir, "05_GC-depth"))
        depth_out = mkdir(os.path.join(out_dir, "05_GC-depth"))
        gc_depth_png = run_gc_depth(genome=genome,
                                    fastq_list=ngs_list,
                                    name=name,
                                    window=window,
                                    thread=thread,
                                    job_type=job_type,
                                    concurrent=concurrent,
                                    refresh=refresh,
                                    work_dir=depth_work,
                                    out_dir=depth_out)
    return stat_heter, heter_png, scope_txt, gse_txt, scope_png, gse_png, stat_genome
Example #15
0
def run_kmer_denovo(r1,
                    r2,
                    taxid,
                    name,
                    mode,
                    cratio,
                    kmer_length,
                    kmer_depth,
                    kingdom,
                    asm,
                    window,
                    thread,
                    job_type,
                    queue,
                    concurrent,
                    refresh,
                    work_dir,
                    out_dir,
                    split,
                    platform="illumina"):

    work_dir = mkdir(work_dir)
    out_dir = mkdir(out_dir)
    r1 = check_paths(r1)
    r2 = check_paths(r2)

    work_dict = {
        "contamination": "01_contamination",
        "gse_scope": "02_gse_scope",
        "kmerfreq": "03_Kmerfreq",
        "denovo": "04_Soapdenovo",
        "gc_depth": "05_GC-depth"
    }

    for k, v in work_dict.items():
        mkdir(os.path.join(work_dir, v))
        if k == "contamination":
            continue
        mkdir(os.path.join(out_dir, v))

    reads, options = run_filter_contamination(r1=r1,
                                              r2=r2,
                                              name=name,
                                              kmer_length=kmer_length,
                                              kmer_depth=kmer_depth,
                                              taxid=taxid,
                                              kingdom=kingdom,
                                              thread=thread,
                                              job_type=job_type,
                                              concurrent=concurrent,
                                              refresh=refresh,
                                              work_dir=os.path.join(
                                                  work_dir,
                                                  work_dict["contamination"]),
                                              out_dir=out_dir,
                                              mode=mode,
                                              cratio=cratio,
                                              split=split)

    dag = DAG("kmer_denovo")
    jellyfish_task, gse_scope_task, scope_txt, gse_txt, scope_png, gse_png, option = gse_scope(
        reads=" ".join(reads),
        name=name,
        kmer_length=kmer_length,
        thread=thread,
        job_type=job_type,
        work_dir=os.path.join(work_dir, work_dict["gse_scope"]),
        out_dir=os.path.join(out_dir, work_dict["gse_scope"]),
        mode=mode)
    options["software"].update(option)
    dag.add_task(jellyfish_task)
    dag.add_task(gse_scope_task)

    kmerfreq_task, heter_task, stat_heter, heter_png, option = kmerfreq(
        reads=" ".join(reads),
        name=name,
        kingdom=kingdom,
        kmer_length=kmer_length,
        thread=thread,
        job_type=job_type,
        work_dir=os.path.join(work_dir, work_dict["kmerfreq"]),
        out_dir=os.path.join(out_dir, work_dict["kmerfreq"]))
    options["software"].update(option)
    dag.add_task(kmerfreq_task)
    dag.add_task(heter_task)

    denovo_task, genome, stat_genome, option = create_soapdenovo_task(
        r1=" ".join(r1),
        r2=" ".join(r2),
        name=name,
        thread=thread,
        queue=queue,
        job_type=job_type,
        work_dir=os.path.join(work_dir, work_dict["denovo"]),
        out_dir=os.path.join(out_dir, work_dict["denovo"]))
    if asm == "true":
        dag.add_task(denovo_task)
    else:
        genome = "false"
        stat_genome = "false"
    do_dag(dag, concurrent, refresh)

    if asm == "true":
        gc_depth = run_gc_depth(genome=genome,
                                r1=" ".join(r1),
                                r2=" ".join(r2),
                                name=name,
                                platform=platform,
                                split="no_split",
                                window=window,
                                thread=thread,
                                job_type=job_type,
                                concurrent=concurrent,
                                refresh=refresh,
                                work_dir=os.path.join(work_dir,
                                                      work_dict["gc_depth"]),
                                out_dir=os.path.join(out_dir,
                                                     work_dict["gc_depth"]))
    else:
        gc_depth = heter_png

    with open(os.path.join(out_dir, "kmer_denovo.json"), "w") as fh:
        json.dump(options, fh, indent=2)

    return stat_heter, heter_png, scope_txt, gse_txt, scope_png, gse_png, stat_genome, gc_depth