def test_grouper(): groups = list(grouper([1, 2] * 100, 2)) assert all(len(x) == 2 for x in groups) assert all(x == (1, 2) for x in groups) groups = list(grouper([1, 2] * 100 + [1], 2)) assert groups[-1] == (1, None)
def test_grouper(): groups = list(grouper([1,2]*100, 2)) assert all(len(x) == 2 for x in groups) assert all(x == (1,2) for x in groups) groups = list(grouper([1,2]*100 + [1], 2)) assert groups[-1] == (1, None)
def test_ld(): # Data from Hartl & Clark, Table 2.2 (pg 85) haplotypes = chain([Alleles(['A','B'])] * 25, [Alleles(['A','b'])] * 475, [Alleles(['a','B'])] * 475, [Alleles(['a','b'])] * 9025) pop = Population() for chroms in grouper(haplotypes,2): ind = pop.founder_individual() ind.genotypes = [chroms] known_D = 0 assert pop.ld( (0,0), (0,1), method='D') == known_D
def read_beagle_genotypefile(filename, pop, missingcode='0'): ''' Reads BEAGLE formatted genotype files Arguments :param filename: Filename of BEAGLE genotype file :param pop: the population to add these individuals to :param missingcode: The value that indicates a missing genotype :type missingcode: string :rtype: void ''' with smartopen(filename) as f: for line in f: rec = BeagleGenotypeRecord(line) if rec.identifier == 'I': inds = [Individual(pop, label) for label in rec.data[::2]] elif rec.is_phenotype_record: for ind, pheno_status in zip(inds, rec.data[::2]): if rec.identifier == 'A': pheno_status = pheno_status == '2' else: try: pheno_status = float(pheno_status) except ValueError: pass ind.phenotypes[rec.label] = pheno_status else: # We've reached the genotypes, and we're skipping out break f.seek(0) gtrows = [ list(grouper(BeagleGenotypeRecord(x).data, 2)) for x in f if x.startswith('M') ] genotypes = zip(*gtrows) for ind, sequentialalleles in zip(inds, genotypes): ind.genotypes = gt_from_seq(ind.chromosomes, sequentialalleles, missing_code=missingcode)
def read_beagle_genotypefile(filename, pop, missingcode='0'): ''' Reads BEAGLE formatted genotype files Arguments :param filename: Filename of BEAGLE genotype file :param pop: the population to add these individuals to :param missingcode: The value that indicates a missing genotype :type missingcode: string :rtype: void ''' with smartopen(filename) as f: for line in f: rec = BeagleGenotypeRecord(line) if rec.identifier == 'I': inds = [Individual(pop, label) for label in rec.data[::2]] elif rec.is_phenotype_record: for ind, pheno_status in zip(inds, rec.data[::2]): if rec.identifier == 'A': pheno_status = pheno_status == '2' else: try: pheno_status = float(pheno_status) except ValueError: pass ind.phenotypes[rec.label] = pheno_status else: # We've reached the genotypes, and we're skipping out break f.seek(0) gtrows = [list(grouper(BeagleGenotypeRecord(x).data, 2)) for x in f if x.startswith('M')] genotypes = zip(*gtrows) for ind, sequentialalleles in zip(inds, genotypes): ind.genotypes = gt_from_seq(ind.chromosomes, sequentialalleles, missing_code=missingcode)