def phase_parents_by_transmission(g, window_size): """Phase parent genotypes from a trio or cross, given progeny genotypes already phased by Mendelian transmission. Parameters ---------- g : GenotypeArray Genotype array, with parents as first two columns and progeny as remaining columns, where progeny genotypes are already phased. window_size : int Number of previous heterozygous sites to include when phasing each parent. A number somewhere between 10 and 100 may be appropriate, depending on levels of heterozygosity and quality of data. Returns ------- g : GenotypeArray Genotype array with parents phased where possible. """ # setup check_type(g, GenotypeArray) check_dtype(g.values, 'i1') check_ploidy(g.ploidy, 2) if g.is_phased is None: raise ValueError('genotype array must first have progeny phased by transmission') check_min_samples(g.n_samples, 3) # run the phasing is_phased = g.is_phased.view('u1') phase_parents_by_transmission_int8(g.values, is_phased, window_size) # outputs return g
def phase_by_transmission(g, window_size, copy=True): """Phase genotypes in a trio or cross where possible using Mendelian transmission. Parameters ---------- g : array_like, int, shape (n_variants, n_samples, 2) Genotype array, with parents as first two columns and progeny as remaining columns. window_size : int Number of previous heterozygous sites to include when phasing each parent. A number somewhere between 10 and 100 may be appropriate, depending on levels of heterozygosity and quality of data. copy : bool, optional If False, attempt to phase genotypes in-place. Note that this is only possible if the input array has int8 dtype, otherwise a copy is always made regardless of this parameter. Returns ------- g : GenotypeArray Genotype array with progeny phased where possible. """ # setup g = np.asarray(g, dtype='i1') g = GenotypeArray(g, copy=copy) g._values = memoryview_safe(g.values) check_ploidy(g.ploidy, 2) check_min_samples(g.n_samples, 3) # phase the progeny is_phased = _opt_phase_progeny_by_transmission(g.values) g.is_phased = np.asarray(is_phased).view(bool) # phase the parents _opt_phase_parents_by_transmission(g.values, is_phased, window_size) return g
def phase_progeny_by_transmission(g): """Phase progeny genotypes from a trio or cross using Mendelian transmission. Parameters ---------- g : array_like, int, shape (n_variants, n_samples, 2) Genotype array, with parents as first two columns and progeny as remaining columns. Returns ------- g : ndarray, int8, shape (n_variants, n_samples, 2) Genotype array with progeny phased where possible. Examples -------- >>> import allel >>> g = allel.GenotypeArray([ ... [[0, 0], [0, 0], [0, 0]], ... [[1, 1], [1, 1], [1, 1]], ... [[0, 0], [1, 1], [0, 1]], ... [[1, 1], [0, 0], [0, 1]], ... [[0, 0], [0, 1], [0, 0]], ... [[0, 0], [0, 1], [0, 1]], ... [[0, 1], [0, 0], [0, 1]], ... [[0, 1], [0, 1], [0, 1]], ... [[0, 1], [1, 2], [0, 1]], ... [[1, 2], [0, 1], [1, 2]], ... [[0, 1], [2, 3], [0, 2]], ... [[2, 3], [0, 1], [1, 3]], ... [[0, 0], [0, 0], [-1, -1]], ... [[0, 0], [0, 0], [1, 1]], ... ], dtype='i1') >>> g = allel.phase_progeny_by_transmission(g) >>> print(g.to_str(row_threshold=None)) 0/0 0/0 0|0 1/1 1/1 1|1 0/0 1/1 0|1 1/1 0/0 1|0 0/0 0/1 0|0 0/0 0/1 0|1 0/1 0/0 1|0 0/1 0/1 0/1 0/1 1/2 0|1 1/2 0/1 2|1 0/1 2/3 0|2 2/3 0/1 3|1 0/0 0/0 ./. 0/0 0/0 1/1 >>> g.is_phased array([[False, False, True], [False, False, True], [False, False, True], [False, False, True], [False, False, True], [False, False, True], [False, False, True], [False, False, False], [False, False, True], [False, False, True], [False, False, True], [False, False, True], [False, False, False], [False, False, False]]) """ # setup g = GenotypeArray(g, dtype='i1', copy=True) check_ploidy(g.ploidy, 2) check_min_samples(g.n_samples, 3) # run the phasing # N.B., a copy has already been made, so no need to make memoryview safe is_phased = _opt_phase_progeny_by_transmission(g.values) g.is_phased = np.asarray(is_phased).view(bool) # outputs return g