コード例 #1
0
def create_unaligned_bam(args, outs):
    star_ref_path = cr_utils.get_reference_star_path(args.reference_path)

    header_buf = cStringIO.StringIO()

    header_buf.write('@HD\tVN:1.4\n')

    # SQ header lines
    with open(os.path.join(star_ref_path, 'chrNameLength.txt')) as f:
        for line in f:
            chr_name, chr_len = line.strip().split('\t')
            header_buf.write('@SQ\tSN:{}\tLN:{}\n'.format(chr_name, chr_len))

    # RG header lines
    for packed_rg in args.read_groups:
        header_buf.write(
            re.sub('\\\\t', '\t', tk_bam.make_rg_header(packed_rg)) + '\n')

    # Get read group ID for this chunk of reads
    read_group = args.read_group

    # pysam doesn't support reading SAM from a StringIO object
    with open('tmphdr', 'w') as f:
        f.write(header_buf.getvalue())
    samfile = pysam.AlignmentFile('tmphdr', 'r', check_sq=False)

    outbam = pysam.AlignmentFile(outs.genome_output, 'wb', template=samfile)

    fastq_file1 = cr_io.open_maybe_gzip(args.read_chunk)
    fastq_file2 = cr_io.open_maybe_gzip(
        args.read2_chunk) if args.read2_chunk else None
    read1s = tk_fasta.read_generator_fastq(fastq_file1)
    read2s = tk_fasta.read_generator_fastq(fastq_file2) if fastq_file2 else []

    record = pysam.AlignedSegment()
    record.flag = 4

    for read1, read2 in itertools.izip_longest(read1s, read2s):
        name, seq, qual = read1
        record.query_name, record.query_sequence = name.split(' ')[0], seq
        record.query_qualities = tk_fasta.get_qvs(qual)
        record.set_tag('RG', read_group, 'Z')
        outbam.write(record)

        if read2:
            name, seq, qual = read2
            record.query_name, record.query_sequence = name.split(' ')[0], seq
            record.query_qualities = tk_fasta.get_qvs(qual)
            record.set_tag('RG', read_group, 'Z')
            outbam.write(record)

    samfile.close()
    fastq_file1.close()
    if fastq_file2 is not None:
        fastq_file2.close()
    outbam.close()
コード例 #2
0
ファイル: __init__.py プロジェクト: GWW/cellranger_211_mirror
def main(args, outs):
    reference_star_path = cr_utils.get_reference_star_path(args.reference_path)
    star = cr_reference.STAR(reference_star_path)

    star.align(args.read_chunk,
               args.read2_chunk,
               outs.genome_output,
               max_report_alignments_per_read=args.max_hits_per_read,
               threads=args.threads,
               read_group_tags=tk_bam.make_star_rg_header(args.read_group))
コード例 #3
0
def main(args, outs):
    reference_star_path = cr_utils.get_reference_star_path(args.reference_path)
    star_index = cr_transcriptome.build_star_index(reference_star_path)
    chroms = star_index[0][0]
    gene_index = cr_reference.GeneIndex.load_pickle(cr_utils.get_reference_genes_index(args.reference_path))
    barcode_whitelist = cr_utils.load_barcode_whitelist(args.barcode_whitelist)
    barcode_dist = cr_utils.load_barcode_dist(args.barcode_counts, barcode_whitelist, args.gem_group)
    reporter = cr_report.Reporter(reference_path=args.reference_path,
                                  high_conf_mapq=cr_constants.STAR_DEFAULT_HIGH_CONF_MAPQ,
                                  gene_index=gene_index,
                                  chroms=chroms,
                                  barcode_whitelist=barcode_whitelist,
                                  barcode_dist=barcode_dist,
                                  gem_groups=args.gem_groups,
                                  umi_length=cr_chem.get_umi_length(args.chemistry_def),
                                  umi_min_qual_threshold=args.umi_min_qual_threshold)

    reporter.attach_bcs_init()
    outs.num_alignments = process_alignments(args.chunk_genome_input, args.chunk_trimmed_input, outs.output, args.bam_comments, reporter, gene_index, star_index, args)
    reporter.attach_bcs_finalize()
    reporter.save(outs.chunked_reporter)