예제 #1
0
def quantitate(data):
    """CWL target for quantitation.

    XXX Needs to be split and parallelized by expression caller, with merging
    of multiple calls.
    """
    data = to_single_data(to_single_data(data))
    data = generate_transcript_counts(data)[0][0]
    data["quant"] = {}
    if "sailfish" in dd.get_expression_caller(data):
        data = to_single_data(sailfish.run_sailfish(data)[0])
        data["quant"]["tsv"] = data["sailfish"]
        data["quant"]["hdf5"] = os.path.join(os.path.dirname(data["sailfish"]),
                                             "abundance.h5")
    if ("kallisto" in dd.get_expression_caller(data)
            or "pizzly" in dd.get_fusion_caller(data, [])):
        data = to_single_data(kallisto.run_kallisto_rnaseq(data)[0])
        data["quant"]["tsv"] = os.path.join(data["kallisto_quant"],
                                            "abundance.tsv")
        data["quant"]["hdf5"] = os.path.join(data["kallisto_quant"],
                                             "abundance.h5")
    if (os.path.exists(os.path.join(data["kallisto_quant"], "fusion.txt"))):
        data["quant"]["fusion"] = os.path.join(data["kallisto_quant"],
                                               "fusion.txt")
    else:
        data["quant"]["fusion"] = None
    if "salmon" in dd.get_expression_caller(data):
        if dd.get_quantify_genome_alignments(data):
            if dd.get_aligner(data).lower() != "star":
                if dd.get_genome_build(data) == "hg38":
                    logger.warning(
                        "Whole genome alignment-based Salmon quantification is "
                        "only supported for the STAR aligner. Since this is hg38 we will fall "
                        "back to the decoy method")
                    data = to_single_data(salmon.run_salmon_decoy(data)[0])
                else:
                    logger.warning(
                        "Whole genome alignment-based Salmon quantification is "
                        "only supported for the STAR aligner. Falling back to the "
                        "transcriptome-only method.")
                    data = to_single_data(salmon.run_salmon_reads(data)[0])
            else:
                data = to_single_data(salmon.run_salmon_bam(data)[0])
        else:
            data = to_single_data(salmon.run_salmon_reads(data)[0])
        data["quant"]["tsv"] = data["salmon"]
        data["quant"]["hdf5"] = os.path.join(os.path.dirname(data["salmon"]),
                                             "abundance.h5")
    return [[data]]
예제 #2
0
def quantitate_expression_parallel(samples, run_parallel):
    """
    quantitate expression, all programs run here should be multithreaded to
    take advantage of the threaded run_parallel environment
    """
    data = samples[0][0]
    to_index = determine_indexes_to_make(samples)
    samples = run_parallel("generate_transcript_counts", samples)
    if "cufflinks" in dd.get_expression_caller(data):
        samples = run_parallel("run_cufflinks", samples)
    if "stringtie" in dd.get_expression_caller(data):
        samples = run_parallel("run_stringtie_expression", samples)
    if ("kallisto" in dd.get_expression_caller(data)
            or dd.get_fusion_mode(data)
            or "pizzly" in dd.get_fusion_caller(data, [])):
        run_parallel("run_kallisto_index", [to_index])
        samples = run_parallel("run_kallisto_rnaseq", samples)
    if "sailfish" in dd.get_expression_caller(data):
        run_parallel("run_sailfish_index", [to_index])
        samples = run_parallel("run_sailfish", samples)

    # always run salmon
    run_parallel("run_salmon_index", [to_index])
    if dd.get_quantify_genome_alignments(data):
        if dd.get_aligner(data).lower() != "star":
            if dd.get_genome_build(data) == "hg38":
                logger.warning(
                    "Whole genome alignment-based Salmon quantification is "
                    "only supported for the STAR aligner. Since this is hg38 we will fall "
                    "back to the decoy method")
                samples = run_parallel("run_salmon_decoy", samples)
            else:
                logger.warning(
                    "Whole genome alignment-based Salmon quantification is "
                    "only supported for the STAR aligner. Falling back to the "
                    "transcriptome-only method.")
                samples = run_parallel("run_salmon_reads", samples)
        else:
            samples = run_parallel("run_salmon_bam", samples)
    else:
        samples = run_parallel("run_salmon_reads", samples)

    samples = run_parallel("detect_fusions", samples)
    return samples