Example #1
0
def _call_is_hom(variations, is_missing=None):
    gts = variations[GT_FIELD]

    is_hom = va.map_blocks(_call_is_hom_in_memory, gts, drop_axis=2)
    if is_missing is not None:
        is_hom[is_missing] = False
    return is_hom
Example #2
0
def count_alleles(gts, max_alleles, count_missing=True):

    def _count_alleles(gts):
        return _count_alleles_in_memory(gts, max_alleles, count_missing=count_missing)

    try:
        chunks = va.reduce_chunk_dimensions(gts)
    except IndexError:
        raise EmptyVariationsError()

    allele_counts_by_snp = va.map_blocks(_count_alleles, gts, chunks=chunks,
                                         drop_axis=(2,))
    return allele_counts_by_snp
Example #3
0
def calc_mac(variations, max_alleles,
             min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT):
    gts = variations[GT_FIELD]
    # determine output chunks - preserve axis0; change axis1, axis2
#     chunks = (gts.chunks[0])
    chunks = None

    def _private_calc_mac(gts):
        return _calc_mac(gts, max_alleles=max_alleles)

    macs = va.map_blocks(_private_calc_mac, gts, chunks=chunks,
                                 drop_axis=(1, 2), dtype=numpy.float64)

    return _mask_stats_with_few_samples(macs, variations, min_num_genotypes)
Example #4
0
def _gt_to_missing(variations, field, min_value):
    gts = variations[GT_FIELD]
    calls_setted_to_missing = variations[field] < min_value
    axis = 2

    def _stack_in_memory(array):
        return stack_in_memory(array, axis=axis)

    # as we can not slice using arrays of diferente dimensions, we need to
    # create one with same dimensions with stack
    p2 = va.map_blocks(_stack_in_memory,
                       calls_setted_to_missing,
                       dtype='i4',
                       new_axis=2)

    gts[p2] = MISSING_INT
    # va.assign_with_masking_value(gts, MISSING_INT, p2)

    variations[GT_FIELD] = gts

    return {FLT_VARS: variations}