コード例 #1
0
ファイル: __init__.py プロジェクト: yu1033704806/cellranger
def annotate_consensus_contig(reference_path,
                              min_score_ratios,
                              min_word_sizes,
                              contig_name,
                              clonotype_name,
                              seq,
                              quals,
                              read_count=None,
                              umi_count=None,
                              info_dict=None,
                              primers=None,
                              use_features=None):
    """ Given a sequence and some auxiliary info, return a populated AnnotatedContig """

    contig = vdj_annot.AnnotatedContig(contig_name,
                                       seq,
                                       quals=quals,
                                       clonotype=clonotype_name,
                                       read_count=read_count,
                                       umi_count=umi_count,
                                       info_dict=info_dict,
                                       filtered=True,
                                       high_confidence=True)

    res = vdj_annot.setup_feature_aligners(reference_path,
                                           min_score_ratios,
                                           min_word_sizes,
                                           use_features=use_features)
    feature_types, feature_aligners, feature_filters = res

    contig.annotations = contig.annotate_features(feature_types,
                                                  feature_aligners,
                                                  feature_filters)
    if primers:
        primer_aligner, primer_filter = vdj_annot.setup_primer_aligner(
            primers, VDJ_ANNOTATION_MIN_SCORE_RATIO)
        contig.primer_annotations = contig.annotate_features_by_group(
            primer_aligner, alignment_filter=primer_filter)

    contig.unannotated_intervals = contig.get_unannotated_intervals()
    contig.annotate_cdr3()

    return contig
コード例 #2
0
def main(args, outs):
    if args.vdj_reference_path is None:
        outs.chunked_annotations = None
        return
    chunk_contigs = []
    barcodes_in_chunk = set(args.barcodes)

    # Set of barcodes that were called as cells
    if args.cell_barcodes:
        cell_barcodes_set = set(vdj_utils.load_cell_barcodes_json(args.cell_barcodes))
    else:
        cell_barcodes_set = set()

    # Setup feature reference sequences
    res = vdj_annot.setup_feature_aligners(args.vdj_reference_path,
                                           args.min_score_ratios,
                                           args.min_word_sizes)
    feature_types, feature_aligners, feature_filters = res

    # Setup primer reference sequnces
    if args.primers:
        primer_aligner, primer_filter = vdj_annot.setup_primer_aligner(args.primers,
                                                                       vdj_constants.VDJ_ANNOTATION_MIN_SCORE_RATIO)

    read_counts = {}
    umi_counts = {}
    if args.contig_summary and os.path.isfile(args.contig_summary):
        contig_summary = pd.read_csv(args.contig_summary, header=0, index_col=None, sep='\t')
        for _, row in contig_summary.iterrows():
            read_counts[row.contig_name] = int(row.num_reads)
            umi_counts[row.contig_name] = int(row.num_umis)

    if args.filter_summary:
        try:
            filter_summary = vdj_utils.load_contig_summary_table(open(args.filter_summary))
        except EmptyDataError:
            filter_summary = None
    else:
        filter_summary = None

    if not args.contigs_fastq is None:
        fq_iter = tk_fasta.read_generator_fastq(open(args.contigs_fastq), paired_end=False)

    for header, contig_sequence in cr_utils.get_fasta_iter(open(args.contigs)):
        if args.contigs_fastq is None:
            contig_quals = None
        else:
            header_fq, contig_sequence_fq, contig_quals = fq_iter.next()
            assert(contig_sequence_fq == contig_sequence)
            assert(header_fq == header)

        barcode = vdj_utils.get_barcode_from_contig_name(header)
        contig_name = header.split(' ')[0]

        # Only annotate barcodes assigned to this chunk and contigs with enough read support
        if barcode in barcodes_in_chunk:
            if filter_summary is not None:
                filtered = vdj_utils.is_contig_filtered(filter_summary, contig_name)
            else:
                filtered = True

            contig = vdj_annot.AnnotatedContig(contig_name,
                                               contig_sequence,
                                               quals=contig_quals,
                                               barcode=barcode,
                                               is_cell=barcode in cell_barcodes_set,
                                               filtered=filtered,
                                               read_count=read_counts.get(contig_name),
                                               umi_count=umi_counts.get(contig_name),
                                               )

            contig.annotations = contig.annotate_features(feature_types,
                                                          feature_aligners,
                                                          feature_filters)

            if args.primers:
                contig.primer_annotations = contig.annotate_features_by_group(primer_aligner,
                                                                              alignment_filter=primer_filter)

            contig.annotate_cdr3()

            chunk_contigs.append(contig)

    cPickle.dump(chunk_contigs, open(outs.chunked_annotations, 'wb'),
                 protocol=cPickle.HIGHEST_PROTOCOL)