コード例 #1
0
ファイル: __init__.py プロジェクト: GWW/cellranger_211_mirror
def call_cell_barcodes(umi_info_path, gem_group):
    """ Call cell barcodes by UMI support.
        Args: umi_info_path (str) - path to umi info h5
              gem_group (int) -  gem group
        Returns: (bc_support, cell_bcs, rt, ut)
                 where bc_support = dict of { barcode: umi_count },
                       cell_bcs = list(str) of cell barcodes)
                       rt = read pair per umi threshold used
                       ut = umi threshold """

    # Get umi info for this gem group only
    bc_idx = vdj_umi_info.get_column(umi_info_path, 'barcode_idx')
    bc_str = vdj_umi_info.get_column(umi_info_path, 'barcodes')
    bc_gg = np.array([int(cr_utils.split_barcode_seq(bc)[1]) for bc in bc_str])
    bc_in_gg = bc_gg == gem_group
    umi_in_gg = bc_in_gg[bc_idx]

    umi_read_pairs = vdj_umi_info.get_column(umi_info_path, 'reads')
    rpu_threshold, umi_threshold, bc_support, confidence = vdj_stats.call_vdj_cells(
        umi_barcode_idx=bc_idx[umi_in_gg],
        umi_read_pairs=umi_read_pairs[umi_in_gg],
        barcodes=bc_str,
        rpu_mix_init_sd=RPU_MIX_INIT_SD,
        umi_mix_init_sd=UMI_MIX_INIT_SD,
        verbosity=1,
    )

    cell_bcs = [
        bc for bc, umis in bc_support.iteritems() if umis >= umi_threshold
    ]

    return bc_support, cell_bcs, rpu_threshold, umi_threshold, confidence
コード例 #2
0
ファイル: __init__.py プロジェクト: yu1033704806/cellranger
def call_cell_barcodes(umi_info_path, gem_group):
    """ Call cell barcodes by UMI support.
        Args: umi_info_path (str) - path to umi info h5
              gem_group (int) -  gem group
        Returns: (bc_support, cell_bcs, rt, ut)
                 where bc_support = dict of { barcode: umi_count },
                       cell_bcs = list(str) of cell barcodes)
                       rt = read pair per umi threshold used
                       ut = umi threshold """

    # Get umi info for this gem group only
    bc_str = vdj_umi_info.get_column(umi_info_path, 'barcodes')
    bc_gg = np.array([int(cr_utils.split_barcode_seq(bc)[1]) for bc in bc_str])
    bc_in_gg = bc_gg == gem_group

    umi_info = vdj_umi_info.read_umi_info(umi_info_path)
    umi_barcode_idx = []
    umi_read_pairs = []
    for bc_idx, data_iter in itertools.groupby(itertools.izip(
            umi_info['barcode_idx'], umi_info['umi_idx'], umi_info['reads']),
                                               key=lambda x: x[0]):
        if not bc_in_gg[bc_idx]:
            continue

        bc_umi_read_pairs = {}
        for _, umi, reads in data_iter:
            bc_umi_read_pairs[umi] = bc_umi_read_pairs.get(umi, 0) + reads

        for r in bc_umi_read_pairs.itervalues():
            umi_barcode_idx.append(bc_idx)
            umi_read_pairs.append(r)

    rpu_threshold, umi_threshold, bc_support, confidence = vdj_stats.call_vdj_cells(
        umi_barcode_idx=np.array(umi_barcode_idx,
                                 dtype=vdj_umi_info.get_dtype('barcode_idx')),
        umi_read_pairs=np.array(umi_read_pairs,
                                dtype=vdj_umi_info.get_dtype('reads')),
        barcodes=bc_str,
        rpu_mix_init_sd=RPU_MIX_INIT_SD,
        umi_mix_init_sd=UMI_MIX_INIT_SD,
        verbosity=1,
    )

    cell_bcs = [
        bc for bc, umis in bc_support.iteritems() if umis >= umi_threshold
    ]

    return bc_support, cell_bcs, rpu_threshold, umi_threshold, confidence
コード例 #3
0
def split(args):
    """ Chunk the UMI info HDF5 file by gem group """

    num_entries = vdj_umi_info.get_num_rows(args.umi_info)
    if num_entries > 1e9:
        print 'Warning: There are >1e9 entries in the umi_info - this could potentially cause an out-of-memory error.'

    # This will cause an OOM if there are >1.5e9 UMIs
    barcode_indices = vdj_umi_info.get_column(args.umi_info, 'barcode_idx')
    barcodes = vdj_umi_info.get_column(args.umi_info, 'barcodes')

    chunks = []

    start_row = 0
    prev_gem_group = None
    prev_barcode_idx = None

    for row, barcode_idx in enumerate(barcode_indices):
        if barcode_idx == prev_barcode_idx:
            continue

        _, gem_group = cr_utils.split_barcode_seq(barcodes[barcode_idx])

        if prev_gem_group is not None and gem_group != prev_gem_group:
            # Write complete chunk
            end_row = row
            mem_gb = max(
                cr_constants.MIN_MEM_GB, 2 * int(
                    np.ceil(
                        vdj_umi_info.get_mem_gb(args.umi_info,
                                                start_row=start_row,
                                                end_row=end_row))))

            chunks.append({
                'gem_group': prev_gem_group,
                'start_row': start_row,
                'end_row': end_row,
                '__mem_gb': mem_gb,
            })

            start_row = end_row

        prev_gem_group = gem_group
        prev_barcode_idx = barcode_idx

    # Write final chunk
    end_row = vdj_umi_info.get_num_rows(args.umi_info)
    mem_gb = max(
        cr_constants.MIN_MEM_GB, 2 * int(
            np.ceil(
                vdj_umi_info.get_mem_gb(
                    args.umi_info, start_row=start_row, end_row=end_row))))

    # Handle case where umi info is empty by supplying a dummy gem group
    if prev_gem_group is None:
        prev_gem_group = args.gem_groups[0]

    chunks.append({
        'gem_group': prev_gem_group,
        'start_row': start_row,
        'end_row': end_row,
        '__mem_gb': mem_gb,
    })

    return {'chunks': chunks}