def _call_is_hom(variations, is_missing=None): gts = variations[GT_FIELD] is_hom = va.map_blocks(_call_is_hom_in_memory, gts, drop_axis=2) if is_missing is not None: is_hom[is_missing] = False return is_hom
def count_alleles(gts, max_alleles, count_missing=True): def _count_alleles(gts): return _count_alleles_in_memory(gts, max_alleles, count_missing=count_missing) try: chunks = va.reduce_chunk_dimensions(gts) except IndexError: raise EmptyVariationsError() allele_counts_by_snp = va.map_blocks(_count_alleles, gts, chunks=chunks, drop_axis=(2,)) return allele_counts_by_snp
def calc_mac(variations, max_alleles, min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT): gts = variations[GT_FIELD] # determine output chunks - preserve axis0; change axis1, axis2 # chunks = (gts.chunks[0]) chunks = None def _private_calc_mac(gts): return _calc_mac(gts, max_alleles=max_alleles) macs = va.map_blocks(_private_calc_mac, gts, chunks=chunks, drop_axis=(1, 2), dtype=numpy.float64) return _mask_stats_with_few_samples(macs, variations, min_num_genotypes)
def _gt_to_missing(variations, field, min_value): gts = variations[GT_FIELD] calls_setted_to_missing = variations[field] < min_value axis = 2 def _stack_in_memory(array): return stack_in_memory(array, axis=axis) # as we can not slice using arrays of diferente dimensions, we need to # create one with same dimensions with stack p2 = va.map_blocks(_stack_in_memory, calls_setted_to_missing, dtype='i4', new_axis=2) gts[p2] = MISSING_INT # va.assign_with_masking_value(gts, MISSING_INT, p2) variations[GT_FIELD] = gts return {FLT_VARS: variations}