def test_count_alleles_subpops(self):

        data = chunked.storage_registry['default'].array(diploid_genotype_data, chunklen=2)
        g = GenotypeChunkedArray(data)
        subpops = {'foo': [0, 2], 'bar': [1]}
        ac_subpops = g.count_alleles_subpops(subpops)
        for p in subpops.keys():
            ac = g.take(subpops[p], axis=1).count_alleles()
            aeq(ac, ac_subpops[p])

        loc = np.array([True, False, True, False, True])
        t = ac_subpops.compress(loc)
        eq(3, len(t))
Beispiel #2
0
def filters_for_haplotyping(
        genotypes: allel.GenotypeChunkedArray,
        variants: allel.VariantChunkedTable,
        chrom: str) -> (allel.GenotypeArray, allel.VariantTable):
    """ Performs a series of filters to prepare the 'genotypes' and 'variants' object
        for haplotyping.

        Parameters:
            genotypes (allel.GenotypeChunkedArray): GenotypesChunkedArray object.
            variants (allel.VariantChunkedTable): VariantChunkedTable object.
            chrom (str): What chromosome should be considered for the haplotype process.
        Returns:
            Tuple (allel.GenotypeArray, allel.VariantTable):
                - allel.GenotypeArray: GenotypeArray object
                - allel.VariantTable: VariantTable object
    """
    # Filter by chrom
    np_array_variants_in_chr = variants_filter_by_chrom(variants, chrom)
    logger.debug(
        "There are {count_variants_in_chr} variants in chromosome {chrom}".
        format(
            count_variants_in_chr=np.count_nonzero(np_array_variants_in_chr),
            chrom=chrom))

    # Filter by segregating SNPs
    allele_count = genotypes.count_alleles()
    np_array_log_sec = allele_count.is_segregating()
    logger.debug("There are {count_log_sec} segregating SNPs".format(
        count_log_sec=np.count_nonzero(np_array_log_sec)))
    np_array_variants_to_keep = np_array_variants_in_chr & np_array_log_sec
    logger.debug("Number of variants to keep {count_variants_to_keep}".format(
        count_variants_to_keep=np.count_nonzero(np_array_variants_to_keep)))
    # Subsets: perform the subset and load the results into memory uncompressed
    genotypes_uc = genotypes.subset(np_array_variants_to_keep,
                                    range(0, genotypes.n_samples))[:]
    variants_np_array = variants[:]
    variants_uc = variants_np_array.compress(np_array_variants_to_keep)
    return genotypes_uc, variants_uc
    def test_constructor(self):

        # missing data arg
        with assert_raises(TypeError):
            # noinspection PyArgumentList
            GenotypeChunkedArray()

        # data has wrong dtype
        data = 'foo bar'
        with assert_raises(TypeError):
            GenotypeChunkedArray(data)

        # data has wrong dtype
        data = np.array([4., 5., 3.7])
        with assert_raises(TypeError):
            GenotypeChunkedArray(data)

        # data has wrong dimensions
        data = np.array([1, 2, 3])
        with assert_raises(TypeError):
            GenotypeChunkedArray(data)

        # data has wrong dimensions
        data = np.array([[1, 2], [3, 4]])  # use HaplotypeChunkedArray instead
        with assert_raises(TypeError):
            GenotypeChunkedArray(data)

        # diploid data (typed)
        g = self.setup_instance(np.array(diploid_genotype_data, dtype='i1'))
        aeq(diploid_genotype_data, g)
        eq(np.int8, g.dtype)

        # polyploid data (typed)
        g = self.setup_instance(np.array(triploid_genotype_data, dtype='i1'))
        aeq(triploid_genotype_data, g)
        eq(np.int8, g.dtype)
 def setup_instance(self, data, **kwargs):
     data = chunked.storage_registry['default'].array(data, chunklen=2, **kwargs)
     return GenotypeChunkedArray(data)
 def setup_instance(self, data, dtype=None):
     data = chunked.hdf5tmp_lzf_storage.array(data, dtype=dtype)
     return GenotypeChunkedArray(data)
 def setup_instance(self, data, **kwargs):
     data = chunked.zarrtmp_storage.array(data, **kwargs)
     return GenotypeChunkedArray(data)