def test_count_alleles_subpops(self): data = chunked.storage_registry['default'].array(diploid_genotype_data, chunklen=2) g = GenotypeChunkedArray(data) subpops = {'foo': [0, 2], 'bar': [1]} ac_subpops = g.count_alleles_subpops(subpops) for p in subpops.keys(): ac = g.take(subpops[p], axis=1).count_alleles() aeq(ac, ac_subpops[p]) loc = np.array([True, False, True, False, True]) t = ac_subpops.compress(loc) eq(3, len(t))
def filters_for_haplotyping( genotypes: allel.GenotypeChunkedArray, variants: allel.VariantChunkedTable, chrom: str) -> (allel.GenotypeArray, allel.VariantTable): """ Performs a series of filters to prepare the 'genotypes' and 'variants' object for haplotyping. Parameters: genotypes (allel.GenotypeChunkedArray): GenotypesChunkedArray object. variants (allel.VariantChunkedTable): VariantChunkedTable object. chrom (str): What chromosome should be considered for the haplotype process. Returns: Tuple (allel.GenotypeArray, allel.VariantTable): - allel.GenotypeArray: GenotypeArray object - allel.VariantTable: VariantTable object """ # Filter by chrom np_array_variants_in_chr = variants_filter_by_chrom(variants, chrom) logger.debug( "There are {count_variants_in_chr} variants in chromosome {chrom}". format( count_variants_in_chr=np.count_nonzero(np_array_variants_in_chr), chrom=chrom)) # Filter by segregating SNPs allele_count = genotypes.count_alleles() np_array_log_sec = allele_count.is_segregating() logger.debug("There are {count_log_sec} segregating SNPs".format( count_log_sec=np.count_nonzero(np_array_log_sec))) np_array_variants_to_keep = np_array_variants_in_chr & np_array_log_sec logger.debug("Number of variants to keep {count_variants_to_keep}".format( count_variants_to_keep=np.count_nonzero(np_array_variants_to_keep))) # Subsets: perform the subset and load the results into memory uncompressed genotypes_uc = genotypes.subset(np_array_variants_to_keep, range(0, genotypes.n_samples))[:] variants_np_array = variants[:] variants_uc = variants_np_array.compress(np_array_variants_to_keep) return genotypes_uc, variants_uc
def test_constructor(self): # missing data arg with assert_raises(TypeError): # noinspection PyArgumentList GenotypeChunkedArray() # data has wrong dtype data = 'foo bar' with assert_raises(TypeError): GenotypeChunkedArray(data) # data has wrong dtype data = np.array([4., 5., 3.7]) with assert_raises(TypeError): GenotypeChunkedArray(data) # data has wrong dimensions data = np.array([1, 2, 3]) with assert_raises(TypeError): GenotypeChunkedArray(data) # data has wrong dimensions data = np.array([[1, 2], [3, 4]]) # use HaplotypeChunkedArray instead with assert_raises(TypeError): GenotypeChunkedArray(data) # diploid data (typed) g = self.setup_instance(np.array(diploid_genotype_data, dtype='i1')) aeq(diploid_genotype_data, g) eq(np.int8, g.dtype) # polyploid data (typed) g = self.setup_instance(np.array(triploid_genotype_data, dtype='i1')) aeq(triploid_genotype_data, g) eq(np.int8, g.dtype)
def setup_instance(self, data, **kwargs): data = chunked.storage_registry['default'].array(data, chunklen=2, **kwargs) return GenotypeChunkedArray(data)
def setup_instance(self, data, dtype=None): data = chunked.hdf5tmp_lzf_storage.array(data, dtype=dtype) return GenotypeChunkedArray(data)
def setup_instance(self, data, **kwargs): data = chunked.zarrtmp_storage.array(data, **kwargs) return GenotypeChunkedArray(data)