Esempio n. 1
0
def convert_to_kallisto(data):
    files = dd.get_input_sequence_files(data)
    if len(files) == 2:
        fq1, fq2 = files
    else:
        fq1, fq2 = files[0], None
    samplename = dd.get_sample_name(data)
    work_dir = dd.get_work_dir(data)
    kallisto_dir = os.path.join(work_dir, "kallisto", samplename, "fastq")
    out_file = os.path.join(kallisto_dir, "barcodes.batch")
    umis = config_utils.get_program("umis", dd.get_config(data))
    if file_exists(out_file):
        return out_file
    if dd.get_minimum_barcode_depth(data):
        cb_histogram = os.path.join(work_dir, "umis", samplename, "cb-histogram.txt")
        cb_cutoff = dd.get_minimum_barcode_depth(data)
        cb_options = "--cb_histogram {cb_histogram} --cb_cutoff {cb_cutoff}"
        cb_options = cb_options.format(**locals())
    else:
        cb_options = ""
    cmd = ("{umis} kallisto {cb_options} --out_dir {tx_kallisto_dir} {fq1}")
    with file_transaction(data, kallisto_dir) as tx_kallisto_dir:
        safe_makedir(tx_kallisto_dir)
        message = ("Transforming %s to Kallisto singlecell format. "
                   % fq1)
        do.run(cmd.format(**locals()), message)
    return out_file
Esempio n. 2
0
def convert_to_kallisto(data):
    files = dd.get_input_sequence_files(data)
    if len(files) == 2:
        fq1, fq2 = files
    else:
        fq1, fq2 = files[0], None
    samplename = dd.get_sample_name(data)
    work_dir = dd.get_work_dir(data)
    kallisto_dir = os.path.join(work_dir, "kallisto", samplename, "fastq")
    out_file = os.path.join(kallisto_dir, "barcodes.batch")
    umis = config_utils.get_program("umis", dd.get_config(data))
    if file_exists(out_file):
        return out_file
    if dd.get_minimum_barcode_depth(data):
        cb_histogram = os.path.join(work_dir, "umis", samplename,
                                    "cb-histogram.txt")
        cb_cutoff = dd.get_minimum_barcode_depth(data)
        cb_options = "--cb_histogram {cb_histogram} --cb_cutoff {cb_cutoff}"
        cb_options = cb_options.format(**locals())
    else:
        cb_options = ""
    cmd = ("{umis} kallisto {cb_options} --out_dir {tx_kallisto_dir} {fq1}")
    with file_transaction(data, kallisto_dir) as tx_kallisto_dir:
        safe_makedir(tx_kallisto_dir)
        message = ("Transforming %s to Kallisto singlecell format. " % fq1)
        do.run(cmd.format(**locals()), message)
    return out_file
Esempio n. 3
0
def tagcount(data):
    bam = dd.get_transcriptome_bam(data)
    umi_dir = os.path.join(dd.get_work_dir(data), "umis")
    sample_dir = os.path.join(umi_dir, dd.get_sample_name(data))
    out_file = os.path.join(sample_dir, dd.get_sample_name(data) + ".mtx")
    if file_exists(out_file):
        data = dd.set_count_file(data, out_file)
        return [[data]]
    umis = config_utils.get_program("umis", data, default="umis")
    safe_makedir(sample_dir)
    cutoff = dd.get_minimum_barcode_depth(data)
    cb_histogram = os.path.join(sample_dir, "cb-histogram.txt")
    positional = "--positional" if dd.get_positional_umi(data, False) else ""
    gtf_file = dd.get_transcriptome_gtf(data, None)

    if gtf_file:
        gene_map_file = os.path.join(
            dd.get_work_dir(data), "annotation",
            os.path.splitext(gtf_file)[0] + "-tx2gene.tsv")
        gene_map_file = gtf.tx2genefile(gtf_file, gene_map_file, tsv=True)
        gene_map_flag = " --genemap {0} ".format(gene_map_file)
    else:
        gene_map_flag = ""

    message = "Counting alignments of transcripts in %s." % bam
    cmd = ("{umis} tagcount {positional} --cb_cutoff {cutoff} --sparse "
           "{gene_map_flag}"
           "--cb_histogram {cb_histogram} {bam} {tx_out_file}")
    out_files = [out_file, out_file + ".rownames", out_file + ".colnames"]
    with file_transaction(out_files) as tx_out_files:
        tx_out_file = tx_out_files[0]
        do.run(cmd.format(**locals()), message)
    data = dd.set_count_file(data, out_file)
    return [[data]]
Esempio n. 4
0
def tagcount(data):
    bam = dd.get_transcriptome_bam(data)
    umi_dir = os.path.join(dd.get_work_dir(data), "umis")
    sample_dir = os.path.join(umi_dir, dd.get_sample_name(data))
    out_prefix = os.path.join(sample_dir, dd.get_sample_name(data))
    out_file = out_prefix + ".mtx"
    if file_exists(out_file):
        data = dd.set_count_file(data, out_file)
        return [[data]]
    umis = config_utils.get_program("umis", data, default="umis")
    safe_makedir(sample_dir)
    cutoff = dd.get_minimum_barcode_depth(data)
    cb_histogram = os.path.join(sample_dir, "cb-histogram.txt")
    positional = "--positional" if dd.get_positional_umi(data, False) else ""
    if use_installed_transcriptome(data):
        gtf_file = dd.get_gtf_file(data)
    else:
        gtf_file = dd.get_transcriptome_gtf(data, None)

    if gtf_file:
        gene_map_file = os.path.join(
            dd.get_work_dir(data), "annotation",
            os.path.basename(os.path.splitext(gtf_file)[0]) + "-tx2gene.tsv")
        gene_map_file = gtf.tx2genefile(gtf_file, gene_map_file, tsv=True)
        gene_map_flag = " --genemap {0} ".format(gene_map_file)
    else:
        gene_map_flag = ""

    message = "Counting alignments of transcripts in %s." % bam
    cmd = ("{umis} fasttagcount --cb_cutoff {cutoff} "
           "{gene_map_flag} "
           "{positional} "
           "--cb_histogram {cb_histogram}")
    out_files = [out_file, out_file + ".rownames", out_file + ".colnames"]
    umi_matrix_file = out_prefix + "-dupes.mtx"
    out_files += [
        umi_matrix_file, umi_matrix_file + ".rownames",
        umi_matrix_file + ".colnames"
    ]
    if has_umi_matrix(data):
        umi_matrix_flag = " --umi_matrix {tx_umi_matrix_full} "
    else:
        umi_matrix_flag = ""
    cmd += umi_matrix_flag
    cmd += " {bam} {tx_out_file_full}"
    with file_transaction(out_files) as tx_out_files:
        tx_out_file = tx_out_files[0]
        tx_out_file_full = tx_out_file + ".full"
        tx_umi_matrix = tx_out_files[3]
        tx_umi_matrix_full = tx_out_files[3] + ".full"
        do.run(cmd.format(**locals()), message)
        cmd = ("{umis} sparse {tx_out_file_full} {tx_out_file}")
        message = "Converting %s to sparse format." % tx_out_file_full
        do.run(cmd.format(**locals()), message)
        if has_umi_matrix(data):
            cmd = ("{umis} sparse {tx_umi_matrix_full} {tx_umi_matrix}")
            message = "Converting %s to sparse format." % tx_umi_matrix_full
        do.run(cmd.format(**locals()), message)
    data = dd.set_count_file(data, out_file)
    return [[data]]
Esempio n. 5
0
def tagcount(data):
    bam = dd.get_transcriptome_bam(data)
    umi_dir = os.path.join(dd.get_work_dir(data), "umis")
    sample_dir = os.path.join(umi_dir, dd.get_sample_name(data))
    out_prefix = os.path.join(sample_dir, dd.get_sample_name(data))
    out_file = out_prefix + ".mtx"
    if file_exists(out_file):
        data = dd.set_count_file(data, out_file)
        return [[data]]
    umis = config_utils.get_program("umis", data, default="umis")
    safe_makedir(sample_dir)
    cutoff = dd.get_minimum_barcode_depth(data)
    cb_histogram = os.path.join(sample_dir, "cb-histogram.txt")
    positional = "--positional" if dd.get_positional_umi(data, False) else ""
    if use_installed_transcriptome(data):
        gtf_file = dd.get_gtf_file(data)
    else:
        gtf_file  = dd.get_transcriptome_gtf(data, None)

    if gtf_file:
        gene_map_file = os.path.join(dd.get_work_dir(data), "annotation",
                                     os.path.splitext(gtf_file)[0] + "-tx2gene.tsv")
        gene_map_file = gtf.tx2genefile(gtf_file, gene_map_file, tsv=True)
        gene_map_flag = " --genemap {0} ".format(gene_map_file)
    else:
        gene_map_flag = ""

    message = "Counting alignments of transcripts in %s." % bam
    cmd = ("{umis} fasttagcount --cb_cutoff {cutoff} "
           "{gene_map_flag} "
           "{positional} "
           "--cb_histogram {cb_histogram}")
    out_files = [out_file, out_file + ".rownames", out_file + ".colnames"]
    umi_matrix_file = out_prefix + "-dupes.mtx"
    out_files += [umi_matrix_file, umi_matrix_file + ".rownames",
                  umi_matrix_file + ".colnames"]
    if has_umi_matrix(data):
        umi_matrix_flag = " --umi_matrix {tx_umi_matrix_full} "
    else:
        umi_matrix_flag = ""
    cmd += umi_matrix_flag
    cmd += " {bam} {tx_out_file_full}"
    with file_transaction(out_files) as tx_out_files:
        tx_out_file = tx_out_files[0]
        tx_out_file_full = tx_out_file + ".full"
        tx_umi_matrix = tx_out_files[3]
        tx_umi_matrix_full = tx_out_files[3] + ".full"
        do.run(cmd.format(**locals()), message)
        cmd = ("{umis} sparse {tx_out_file_full} {tx_out_file}")
        message = "Converting %s to sparse format." % tx_out_file_full
        do.run(cmd.format(**locals()), message)
        if has_umi_matrix(data):
            cmd = ("{umis} sparse {tx_umi_matrix_full} {tx_umi_matrix}")
            message = "Converting %s to sparse format." % tx_umi_matrix_full
        do.run(cmd.format(**locals()), message)
    data = dd.set_count_file(data, out_file)
    return [[data]]
Esempio n. 6
0
def barcode_histogram(data):
    fq1 = dd.get_input_sequence_files(data)[0]
    umi_dir = os.path.join(dd.get_work_dir(data), "umis")
    sample_dir = os.path.join(umi_dir, dd.get_sample_name(data))
    umis = config_utils.get_program("umis", data, default="umis")
    safe_makedir(sample_dir)
    out_file = os.path.join(sample_dir, "cb-histogram.txt")
    filtered_out_file = os.path.join(sample_dir, "cb-histogram-filtered.txt")
    fq1_cmd = fq1
    cmd = "{umis} cb_histogram {fq1_cmd} > {tx_out_file}"
    if not file_exists(out_file):
        with file_transaction(out_file) as tx_out_file:
            message = "Computing cellular barcode counts for %s." % fq1
            do.run(cmd.format(**locals()), message)
    cutoff = dd.get_minimum_barcode_depth(data)
    filter_barcode_histogram(filtered_out_file, out_file, cutoff)
    return [[data]]
Esempio n. 7
0
def barcode_histogram(data):
    fq1 = dd.get_input_sequence_files(data)[0]
    umi_dir = os.path.join(dd.get_work_dir(data), "umis")
    sample_dir = os.path.join(umi_dir, dd.get_sample_name(data))
    umis = config_utils.get_program("umis", data, default="umis")
    safe_makedir(sample_dir)
    out_file = os.path.join(sample_dir, "cb-histogram.txt")
    filtered_out_file = os.path.join(sample_dir, "cb-histogram-filtered.txt")
    fq1_cmd = fq1
    cmd = "{umis} cb_histogram {fq1_cmd} > {tx_out_file}"
    if not file_exists(out_file):
        with file_transaction(out_file) as tx_out_file:
            message = "Computing cellular barcode counts for %s." % fq1
            do.run(cmd.format(**locals()), message)
    cutoff = dd.get_minimum_barcode_depth(data)
    filter_barcode_histogram(filtered_out_file, out_file, cutoff)
    newdata = dd.set_histogram_counts(data, filtered_out_file)
    return [[newdata]]
Esempio n. 8
0
def tagcount(data):
    bam = dd.get_transcriptome_bam(data)
    umi_dir = os.path.join(dd.get_work_dir(data), "umis")
    sample_dir = os.path.join(umi_dir, dd.get_sample_name(data))
    out_file = os.path.join(sample_dir, dd.get_sample_name(data) + ".counts")
    if file_exists(out_file):
        data = dd.set_count_file(data, out_file)
        return [[data]]
    umis = config_utils.get_program("umis", data, default="umis")
    safe_makedir(sample_dir)
    cutoff = dd.get_minimum_barcode_depth(data)
    cb_histogram = os.path.join(sample_dir, "cb-histogram.txt")
    message = "Counting alignments of transcripts in %s." % bam
    cmd = ("{umis} tagcount --positional --cb_cutoff {cutoff} --cb_histogram "
           "{cb_histogram} {bam} {tx_out_file}")
    with file_transaction(out_file) as tx_out_file:
        do.run(cmd.format(**locals()), message)
    data = dd.set_count_file(data, out_file)
    return [[data]]
Esempio n. 9
0
def tagcount(data):
    bam = dd.get_transcriptome_bam(data)
    umi_dir = os.path.join(dd.get_work_dir(data), "umis")
    sample_dir = os.path.join(umi_dir, dd.get_sample_name(data))
    out_file = os.path.join(sample_dir, dd.get_sample_name(data) + ".counts")
    if file_exists(out_file):
        data = dd.set_count_file(data, out_file)
        return [[data]]
    umis = config_utils.get_program("umis", data, default="umis")
    safe_makedir(sample_dir)
    cutoff = dd.get_minimum_barcode_depth(data)
    cb_histogram = os.path.join(sample_dir, "cb-histogram.txt")
    message = "Counting alignments of transcripts in %s." % bam
    cmd = ("{umis} tagcount --positional --cb_cutoff {cutoff} --cb_histogram "
           "{cb_histogram} {bam} {tx_out_file}")
    with file_transaction(out_file) as tx_out_file:
        do.run(cmd.format(**locals()), message)
    data = dd.set_count_file(data, out_file)
    return [[data]]
Esempio n. 10
0
def tagcount(data):
    bam = dd.get_transcriptome_bam(data)
    umi_dir = os.path.join(dd.get_work_dir(data), "umis")
    sample_dir = os.path.join(umi_dir, dd.get_sample_name(data))
    out_file = os.path.join(sample_dir, dd.get_sample_name(data) + ".mtx")
    if file_exists(out_file):
        data = dd.set_count_file(data, out_file)
        return [[data]]
    umis = config_utils.get_program("umis", data, default="umis")
    safe_makedir(sample_dir)
    cutoff = dd.get_minimum_barcode_depth(data)
    cb_histogram = os.path.join(sample_dir, "cb-histogram.txt")
    positional = "--positional" if dd.get_positional_umi(data, False) else ""
    message = "Counting alignments of transcripts in %s." % bam
    cmd = ("{umis} tagcount {positional} --cb_cutoff {cutoff} --sparse "
           "--cb_histogram {cb_histogram} {bam} {tx_out_file}")
    out_files = [out_file, out_file + ".rownames", out_file + ".colnames"]
    with file_transaction(out_files) as tx_out_files:
        tx_out_file = tx_out_files[0]
        do.run(cmd.format(**locals()), message)
    data = dd.set_count_file(data, out_file)
    return [[data]]