Example #1
0
def join(args, outs, chunk_defs, chunk_outs):
    if do_not_make_cloupe(args):
        outs.output_for_cloupe = None
        return

    reference = ReferenceManager(args.reference_path)

    contig_info_fn = martian.make_path("contig_info.json")
    with open(contig_info_fn, 'w') as outfile:
        contig_info = get_contig_info(args.reference_path)
        json.dump(contig_info, outfile)

    gem_group_index_json = get_gem_group_index_json(args, outs)

    call = [
        "crconverter",
        args.sample_id,
        args.pipestance_type,
        "--matrix",
        args.feature_barcode_matrix,
        "--analysis",
        args.analysis,
        "--output",
        outs.output_for_cloupe,
        "--description",
        '"' + args.sample_desc + '"',
        "--peaks",
        args.peaks,
        "--fragmentsindex",
        args.fragments_index,
        "--geneannotations",
        reference.genes,
        "--contiginfo",
        contig_info_fn,
    ]

    if args.metrics_json is not None:
        call.extend(["--metrics", args.metrics_json])
    if args.aggregation_csv is not None:
        call.extend(["--aggregation", args.aggregation_csv])
    if gem_group_index_json is not None:
        call.extend(["--gemgroups", gem_group_index_json])
    transcript_gene_types = get_annotation_gene_types(args)
    if transcript_gene_types is not None:
        call.extend(["--geneannotationtypes", ",".join(transcript_gene_types)])

    # the sample desc may be unicode, so send the whole
    # set of args str utf-8 to check_output
    unicode_call = [arg.encode('utf-8') for arg in call]

    # but keep the arg 'call' here because log_info inherently
    # attempts to encode the message... (TODO: should log_info
    # figure out the encoding of the input string)
    martian.log_info("Running crconverter: %s" % " ".join(call))
    try:
        results = tk_subproc.check_output(unicode_call)
        martian.log_info("crconverter output: %s" % results)
    except subprocess.CalledProcessError as e:
        outs.output_for_cloupe = None
        martian.throw("Could not generate .cloupe file: \n%s" % e.output)
Example #2
0
def main(args, outs):
    if do_not_make_cloupe(args):
        outs.output_for_cloupe = None
        return

    gem_group_index_json = get_gem_group_index_json(args, outs)

    call = [
        "crconverter", args.sample_id, args.pipestance_type, "--matrix",
        args.filtered_gene_bc_matrices_h5, "--analysis",
        get_analysis_h5_path(args), "--output", outs.output_for_cloupe,
        "--description", args.sample_desc
    ]

    if args.metrics_json:
        call.extend(["--metrics", args.metrics_json])
    if args.aggregation_csv:
        call.extend(["--aggregation", args.aggregation_csv])
    if gem_group_index_json:
        call.extend(["--gemgroups", gem_group_index_json])

    # the sample desc may be unicode, so send the whole
    # set of args str utf-8 to check_output
    unicode_call = [arg.encode('utf-8') for arg in call]

    # but keep the arg 'call' here because log_info inherently
    # attempts to encode the message... (TODO: should log_info
    # figure out the encoding of the input string)
    martian.log_info("Running crconverter: %s" % " ".join(call))
    try:
        results = tk_subproc.check_output(unicode_call)
        martian.log_info("crconverter output: %s" % results)
    except subprocess.CalledProcessError, e:
        outs.output_for_cloupe = None
        martian.throw("Could not generate .cloupe file: \n%s" % e.output)
Example #3
0
def record_package_versions():
    for package in cr_constants.PACKAGE_VERSION_CMDS:
        name = package['name']
        cmd = package['cmd']

        version = tk_subproc.check_output(cmd, shell=True)
        print '%s: %s' % (name, version)
Example #4
0
def join(args, outs, chunk_defs, chunk_outs):
    contig_info = get_contig_info(args)
    with open(outs.contig_info_json, 'w') as outfile:
        json.dump(contig_info, outfile)

    call = [
        "dlconverter", args.sample_id, "--output", outs.output_for_dloupe,
        "--description", args.sample_desc, "--node-profile-h5",
        args.normalized_node_profiles, "--contig-info-json",
        outs.contig_info_json, "--merged-bed", args.node_cnv_calls,
        "--tree-data", args.tree_data, "--tracks", args.tracks,
        "--per-cell-summary", args.per_cell_summary_metrics
    ]

    gene_annotation_path = tk_ref.get_loupe_genes(args.reference_path)
    if os.path.exists(gene_annotation_path):
        call.extend(["--gene-annotations", gene_annotation_path])

    # the sample desc may be unicode, so send the whole
    # set of args str utf-8 to check_output
    unicode_call = [arg.encode('utf-8') for arg in call]

    martian.log_info("Running dlconverter: %s" % " ".join(call))
    try:
        results = tk_subproc.check_output(unicode_call)
        martian.log_info("dlconverter output: %s" % results)
    except subprocess.CalledProcessError, e:
        outs.output_for_dloupe = None
        martian.throw("Could not generate .dloupe file: \n%s" % e.output)
Example #5
0
def main(args, outs):
    if args.pipestance_type != "count" and args.pipestance_type != "aggr":
        martian.exit("The type argument must be one of: count, aggr")

    if args.pipestance_type == "count":
        pname = "SC_RNA_COUNTER_CS"
    if args.pipestance_type == "aggr":
        pname = "SC_RNA_AGGREGATOR_CS"

    pipestance_exists = os.path.exists(args.pipestance_path)
    if not pipestance_exists:
        martian.exit("Invalid pipestance path: %s" % args.pipestance_path)

    # check to see if an analysis file exists.  If it doesn't, then
    # this is likely a barnyard sample, and we cannot generate a
    # .loupe file (CELLRANGER-773);
    analysis_h5_path = os.path.join(args.pipestance_path, "outs/analysis/analysis.h5")

    # 1.2.0 location only
    internal_count_h5_path = os.path.join(
        args.pipestance_path,
        "SC_RNA_COUNTER_CS/SC_RNA_COUNTER/SC_RNA_ANALYZER/SUMMARIZE_ANALYSIS/fork0/files/analysis/analysis.h5"
    )

    internal_aggr_h5_path = os.path.join(
        args.pipestance_path,
        "SC_RNA_AGGREGATOR_CS/SC_RNA_AGGREGATOR/SC_RNA_ANALYZER/SUMMARIZE_ANALYSIS/fork0/files/analysis/analysis.h5"
    )

    if not os.path.exists(analysis_h5_path) \
            and not os.path.exists(internal_count_h5_path) \
            and not os.path.exists(internal_aggr_h5_path):
        martian.exit("Could not find single-species analysis HDF5 file. " +
                     "Loupe Cell Browser files are not generated for multi-species experiments.")

    # has to be 1.2 or higher
    cellranger_pd_before_1_2_path = os.path.join(args.pipestance_path, "CELLRANGER_PD")
    cellranger_cs_before_1_2_path = os.path.join(args.pipestance_path, "CELLRANGER_CS")
    if os.path.exists(cellranger_pd_before_1_2_path) or os.path.exists(cellranger_cs_before_1_2_path):
        martian.exit("mkloupe is only supported for Cell Ranger 1.2 and later.")

    call = ["crconverter",
            args.sample_id,
            pname,
            "--pipestance", args.pipestance_path,
            "--output", outs.output_for_cloupe]

    martian.log_info("Running crconverter: %s" % " ".join(call))
    try:
        results = tk_subproc.check_output(call)
        martian.log_info("crconverter output: %s" % results)
    except subprocess.CalledProcessError, e:
        outs.output_for_cloupe = None
        martian.throw("Could not generate .cloupe file: \n%s" % e.output)
Example #6
0
def get_version():
    # NOTE: this makes assumptions about the directory structure
    script_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..',
                              '..', '..', 'bin')
    version_fn = os.path.join(script_dir, '..', '.version')
    if os.path.exists(version_fn):
        with open(version_fn, 'r') as f:
            output = f.read()
    else:
        output = tk_subproc.check_output(
            ['git', 'describe', '--tags', '--always', '--dirty'],
            cwd=script_dir)
    return output.strip()
Example #7
0
def main(args, outs):
    """
    Run the vlconverter executable with inputs that should be available in the outs
    folder at the end of the pipeline run.  This will generate "output_for_vloupe.vloupe"
    in the stage folder.

    Memory usage not expected to be excessive with this (thus no custom split/join
    as of yet); it will need to load a few full files (bam.bai, fasta.fai) into memory.
    """
    if args.concat_ref_bam is None or not os.path.isfile(args.concat_ref_bam) or \
       args.consensus_bam is None or not os.path.isfile(args.consensus_bam) or \
       args.contig_bam_bai is None or not os.path.isfile(args.contig_bam_bai):
        martian.log_info(
            'One or more bam files missing - cannot make vloupe file')
        return

    call = [
        "vlconverter", args.sample_id, args.pipestance_type, "--output",
        outs.output_for_vloupe, "--reference-bam", args.concat_ref_bam,
        "--reference-bam-index", args.concat_ref_bam_bai, "--reference-fasta",
        args.concat_ref_fasta, "--reference-fasta-index",
        args.concat_ref_fasta_fai, "--reference-annotations",
        args.concat_ref_annotations_json, "--clonotypes", args.clonotypes_csv,
        "--consensus-bam", args.consensus_bam, "--consensus-bam-index",
        args.consensus_bam_bai, "--consensus-annotations",
        args.consensus_annotations_json, "--consensus-fasta",
        args.consensus_fasta, "--consensus-fasta-index",
        args.consensus_fasta_fai, "--contig-bam-relative-path",
        args.contig_bam_relative_path, "--contig-bam-index",
        args.contig_bam_bai, "--contig-annotations",
        args.contig_annotations_json, "--contig-bed",
        args.contig_annotations_bed, "--contig-fasta", args.contig_fasta,
        "--contig-fasta-index", args.contig_fasta_fai, "--description",
        args.sample_desc
    ]

    # the sample desc may be unicode, so send the whole
    # set of args str utf-8 to check_output
    unicode_call = [arg.encode('utf-8') for arg in call]

    # but keep the arg 'call' here because log_info inherently
    # attempts to encode the message... (TODO: should log_info
    # figure out the encoding of the input string)
    martian.log_info("Running vlconverter: %s" % " ".join(call))
    try:
        results = tk_subproc.check_output(unicode_call)
        martian.log_info("vlconverter output: %s" % results)
    except subprocess.CalledProcessError, e:
        outs.output_for_vloupe = None
        martian.throw("Could not generate .vloupe file: \n%s" % e.output)
def get_unmapped_read_count_from_indexed_bam(bam_file_name):
    """
    Get number of unmapped reads from an indexed BAM file.

    Args:
        bam_file_name (str): Name of indexed BAM file.

    Returns:
        int: number of unmapped reads in the BAM

    Note:
        BAM must be indexed for lookup using samtools.

    """

    index_output = tk_subproc.check_output('samtools idxstats %s' %
                                           bam_file_name,
                                           shell=True)
    return int(index_output.strip().split('\n')[-1].split()[-1])