コード例 #1
0
ファイル: __init__.py プロジェクト: GWW/cellranger_211_mirror
def concatenate_sort_and_index_bams(out_bam, bams):
    tmp_bam = out_bam.replace('.bam', '_tmp.bam')
    # Drop the UMI tag since it's useless
    vdj_utils.concatenate_and_fix_bams(tmp_bam, bams, drop_tags=[PROCESSED_UMI_TAG])
    # Wrap filenames in str() to prevent pysam crash on unicode input
    tk_bam.sort_and_index(str(tmp_bam), str(out_bam.replace('.bam', '')))
    cr_utils.remove(tmp_bam, allow_nonexisting=True)
コード例 #2
0
ファイル: __init__.py プロジェクト: GWW/cellranger_211_mirror
def rm_files(filenames):
    for filename in filenames:
        cr_utils.remove(filename, allow_nonexisting=True)
コード例 #3
0
def join(args, outs, chunk_defs, chunk_outs):
    contigs = []
    contig_fastqs = []
    contig_bams = []

    if len(chunk_outs) == 0:
        # No input reads
        # Create empty BAM file
        with open(outs.contig_bam, 'w') as f:
            pass
        outs.contig_bam_bai = None
        # Create empty contig FASTA
        with open(outs.contig_fasta, 'w') as f:
            pass
        outs.contig_fasta_fai = None
        # Create empty contig FASTQ
        with open(outs.contig_fastq, 'w') as f:
            pass
        outs.metrics_summary_json = None
        outs.summary_tsv = None
        outs.umi_summary_tsv = None
        return

    summary_tsvs = []
    umi_summary_tsvs = []

    for chunk_out in chunk_outs:
        if not os.path.isfile(chunk_out.contig_fasta):
            continue
        contigs.append(chunk_out.contig_fasta)

        contig_fastqs.append(chunk_out.contig_fastq)
        contig_bams.append(chunk_out.contig_bam)

        summary_tsvs.append(chunk_out.summary_tsv)
        umi_summary_tsvs.append(chunk_out.umi_summary_tsv)

    cr_utils.concatenate_files(outs.contig_fasta, contigs)

    if os.path.getsize(outs.contig_fasta) > 0:
        tk_subproc.check_call('samtools faidx %s' % outs.contig_fasta,
                              shell=True)
        outs.contig_fasta_fai = outs.contig_fasta + '.fai'

    cr_utils.concatenate_files(outs.contig_fastq, contig_fastqs)

    if len(summary_tsvs) > 0:
        cr_utils.concatenate_headered_files(outs.summary_tsv, summary_tsvs)
    if len(umi_summary_tsvs) > 0:
        cr_utils.concatenate_headered_files(outs.umi_summary_tsv,
                                            umi_summary_tsvs)

    if contig_bams:
        # Merge every N BAMs. Trying to merge them all at once
        #  risks hitting the filehandle limit.
        n_merged = 0

        while len(contig_bams) > 1:
            to_merge = contig_bams[0:MERGE_BAMS_N]

            tmp_bam = martian.make_path('merged-%04d.bam' % n_merged)
            n_merged += 1

            print "Merging %d BAMs into %s ..." % (len(to_merge), tmp_bam)
            tk_bam.merge(tmp_bam, to_merge, threads=args.__threads)

            # Delete any temporary bams that have been merged
            for in_bam in to_merge:
                if os.path.basename(in_bam).startswith('merged-'):
                    cr_utils.remove(in_bam)

            # Pop the input bams and push the merged bam
            contig_bams = contig_bams[len(to_merge):] + [tmp_bam]

        if os.path.basename(contig_bams[0]).startswith('merged-'):
            # We merged at least two chunks together.
            # Rename it to the output bam.
            cr_utils.move(contig_bams[0], outs.contig_bam)
        else:
            # There was only a single chunk, so copy it from the input
            cr_utils.copy(contig_bams[0], outs.contig_bam)

        tk_bam.index(outs.contig_bam)

        # Make sure the Martian out matches the actual index filename
        outs.contig_bam_bai = outs.contig_bam + '.bai'

    # Merge the assembler summary jsons
    merged_summary = cr_utils.merge_jsons_single_level(
        [out.metrics_summary_json for out in chunk_outs])

    with open(outs.metrics_summary_json, 'w') as f:
        json.dump(tk_safe_json.json_sanitize(merged_summary),
                  f,
                  indent=4,
                  sort_keys=True)