def test_labelledallele_delabeler(): ngenos = 10 # Number of genotypes per chromosome if ngenos % 2 == 1: raise ValueError('Even number of genotypes needed') p = Population() c = ChromosomeTemplate() for i in range(ngenos): c.add_genotype() p.add_chromosome(c) a = Individual(p, 1) a._init_genotypes(blankchroms=False) a.genotypes[0][0] = Alleles([1] * ngenos) a.genotypes[0][1] = Alleles([2] * ngenos) b = Individual(p, 2) b._init_genotypes(blankchroms=False) b.genotypes[0][0] = Alleles([3] * ngenos) b.genotypes[0][1] = Alleles([4] * ngenos) chromatid_spans = [ InheritanceSpan(a, 0, 0, 0, ngenos // 2), InheritanceSpan(b, 0, 1, ngenos // 2, ngenos) ] chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c) expected_value = [1] * (ngenos // 2) + [4] * (ngenos // 2) expected_value = Alleles(expected_value) actual_value = chromatid.delabel() assert all(actual_value == expected_value)
def test_recombine(): a = Alleles(np.zeros(10)) b = Alleles(np.ones(10)) m = np.arange(1, 100, 10) n = recombine(a, b, m) assert len(n) == len(m) assert type(n) == type(a) == type(b) assert_raises(ValueError, recombine, None, None, None)
def test_alleles(): a = Alleles(['1', '2', '3', '']) b = Alleles(['1', '3', '2', '']) assert a.nmark() == b.nmark() == 4 # Test missingness assert a.missingcode == '' assert (a.missing == np.array([False, False, False, True])).all() assert (a.missing == b.missing).all() eq = (a == b) assert (eq == np.array([True, False, False, True])).all() # Test copy span z = Alleles(np.zeros(10)) o = Alleles(np.ones(10)) z.copy_span(o, 5, 8) expected_value = np.array( [0., 0., 0., 0., 0., 1., 1., 1., 0., 0.]) assert all(z == expected_value) # Test empty_like a = Alleles(np.zeros(10)) b = a.empty_like() expected_value = Alleles(np.zeros(10), dtype=a.dtype) assert all(b == expected_value)
def test_major_allele(): pop = Population() # 2000 A alleles for x in range(1000): ind = pop.founder_individual() ind.genotypes = [(Alleles([1]), Alleles([1]))] # 500 A alleles, 500 B for x in range(500): ind = pop.founder_individual() ind.genotypes = [(Alleles([1]), Alleles([2]))] assert pop.major_allele((0, 0)) == 1
def test_allele_list(): pop = Population() loc = 0, 0 # 2000 A alleles for x in range(1000): ind = pop.founder_individual() ind.genotypes = [(Alleles([1]), Alleles([1]))] # 500 A alleles, 500 B for x in range(500): ind = pop.founder_individual() ind.genotypes = [(Alleles([1]), Alleles([2]))] assert sorted(pop.allele_list(loc)) == [1] * 2500 + [2] * 500
def genotypes_from_sequential_alleles(chromosomes, data, missing_code='0'): ''' Takes a series of alleles and turns them into genotypes. For example: The series '1 2 1 2 1 2' becomes chrom1 = [1, 1, 1] chrom2 = [2, 2, 2] These are returned in the a list in the form: :: [(chroma, chromb), (chroma, chromb)...] :param chromosomes: genotype data :param data: The alleles to be turned into genotypes :param missing_code: value representing a missing allele :type chromosomes: list of ChromosomeTemplate :type missing_code: string :returns: A list of 2-tuples of Alleles objects ''' genotypes = [] data = np.array(data) if not np.issubdtype(type(missing_code), data.dtype): raise ValueError( 'Invalid type for missing code: {}. Expected: {}'.format( type(missing_code), data.dtype)) if np.issubdtype(data.dtype, str): data[data == missing_code] = '' else: data[data == missing_code] = 0 strand_a = data[0::2] strand_b = data[1::2] start = 0 for chrom in chromosomes: size = chrom.nmark() stop = start + size chroma = Alleles(strand_a[start:stop], template=chrom) chromb = Alleles(strand_b[start:stop], template=chrom) genotypes.append((chroma, chromb)) start += size return genotypes
def test_chromwide_ibs(): g1 = [(2,2), (1,2), (1,2), (1,1), (0, 0)] g2 = [(2,2), (1,2), (2,2), (2,2), (1, 1)] a, b = [Alleles(x) for x in zip(*g1)] c, d = [Alleles(x) for x in zip(*g2)] expected = np.array([2,2,1,0,64]) assert (chromwide_ibs(a,b,c,d) == expected).all() # spa, spb = [SparseAlleles(x) for x in zip(*g1)] # spc, spd = [SparseAlleles(x) for x in zip(*g2)] # assert (chromwide_ibs(spa, spb, spc, spd) == expected).all() # Test assertions assert_raises(ValueError, chromwide_ibs, a, b, c, d, missingval=600) assert_raises(ValueError, chromwide_ibs, a, b, c, d, missingval=-1)
def todense(self): """ Converts to a dense representation of the same genotypes (Alleles). :returns: dense version :rtype: Alleles """ dense = Alleles(self.container.tolist(), template=self.template) return dense
def empty_chromosome(self, dtype=np.uint8, sparse=False, refcode=None): """ Produces a completely empty chromosome associated with this template. :param sparse: Should a SparseAlleles object be returned :type sparse: bool :param refcode: if sparse, what should the refcode be? :type refcode: int8_t :returns: empty alleles container """ if sparse: return SparseAlleles(size=self.nmark(), template=self, refcode=refcode) else: return Alleles(np.zeros(self.nmark(), dtype=dtype), template=self)
def linkageequilibrium_chromosome(self, sparse=False): """ Returns a randomly generated chromosome in linage equilibrium :param sparse: Should the output be sparse :type sparse: bool :returns: random chromosome :rtype: Alleles or SparseAlleles """ if (self.frequencies < 0).any(): raise ValueError('Not all frequencies are specified') r = np.random.random(self.nmark()) r = np.array(r < self.frequencies, dtype=np.int8) + 1 if sparse: return SparseAlleles(r - 1, refcode=0, template=self) else: return Alleles(r, template=self)
def linkageequilibrium_chromosomes(self, nchrom): """ Returns a numpy array of many randomly generated chromosomes """ chroms = np.random.random((nchrom, self.nmark())) chroms = np.int8((chroms < self.frequencies) + 1) return [Alleles(r) for r in chroms]
def test_alleles(): a = Alleles(['1', '2', '3', '']) b = Alleles(['1', '3', '2', '']) assert a.nmark() == b.nmark() == 4 # Test missingness assert a.missingcode == '' assert (a.missing == np.array([False, False, False, True])).all() assert (a.missing == b.missing).all() eq = (a == b) assert (eq == np.array([True, False, False, True])).all() # Test copy span z = Alleles(np.zeros(10)) o = Alleles(np.ones(10)) z.copy_span(o, 5, 8) expected_value = np.array([0., 0., 0., 0., 0., 1., 1., 1., 0., 0.]) assert all(z == expected_value) # Test empty_like a = Alleles(np.zeros(10)) b = a.empty_like() expected_value = Alleles(np.zeros(10), dtype=a.dtype) assert all(b == expected_value)
from pydigree.common import spans from pydigree.genotypes import Alleles try: import line_profiler except ImportError: print("No line profiler, skipping test.") import sys sys.exit(0) test_data = Alleles([0] * 10000) func = spans for start in range(1, 10000, 200): test_data[start:(start + 100)] = 1 profile = line_profiler.LineProfiler(func) profile.runcall(func, test_data) profile.print_stats()