def test_labelledallele_delabeler(): ngenos = 10 # Number of genotypes per chromosome if ngenos % 2 == 1: raise ValueError('Even number of genotypes needed') p = Population() c = ChromosomeTemplate() for i in range(ngenos): c.add_genotype() p.add_chromosome(c) a = Individual(p, 1) a._init_genotypes(blankchroms=False) a.genotypes[0][0] = Alleles([1]*ngenos) a.genotypes[0][1] = Alleles([2]*ngenos) b = Individual(p, 2) b._init_genotypes(blankchroms=False) b.genotypes[0][0] = Alleles([3] * ngenos) b.genotypes[0][1] = Alleles([4] * ngenos) chromatid_spans = [InheritanceSpan(a, 0, 0, 0, ngenos//2), InheritanceSpan(b, 0, 1, ngenos//2, ngenos)] chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c) expected_value = [1]*(ngenos//2) + [4] * (ngenos//2) expected_value = Alleles(expected_value) actual_value = chromatid.delabel() assert all(actual_value == expected_value)
def test_labelledallele_delabeler(): ngenos = 10 # Number of genotypes per chromosome if ngenos % 2 == 1: raise ValueError('Even number of genotypes needed') p = Population() c = ChromosomeTemplate() for i in range(ngenos): c.add_genotype() p.add_chromosome(c) a = Individual(p, 1) a._init_genotypes(blankchroms=False) a.genotypes[0][0] = Alleles([1] * ngenos) a.genotypes[0][1] = Alleles([2] * ngenos) b = Individual(p, 2) b._init_genotypes(blankchroms=False) b.genotypes[0][0] = Alleles([3] * ngenos) b.genotypes[0][1] = Alleles([4] * ngenos) chromatid_spans = [ InheritanceSpan(a, 0, 0, 0, ngenos // 2), InheritanceSpan(b, 0, 1, ngenos // 2, ngenos) ] chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c) expected_value = [1] * (ngenos // 2) + [4] * (ngenos // 2) expected_value = Alleles(expected_value) actual_value = chromatid.delabel() assert all(actual_value == expected_value)
def test_labelledalleles(): IS = InheritanceSpan ngenos = 50 p = Population() c = ChromosomeTemplate() for i in range(ngenos): c.add_genotype() p.add_chromosome(c) a = Individual(p, 1) actual = LabelledAlleles.founder_chromosome(a, 0, 0, chromobj=c) expected = LabelledAlleles(spans=[IS(a, 0, 0, 0, ngenos)], chromobj=c) assert actual == expected
def read_vcf(filename, require_pass=False, freq_info=None, info_filters=None): ''' Reads a VCF file and returns a Population object with the individuals represented in the file ''' if not info_filters: info_filters = [] for filter in info_filters: if not callable(filter): raise ValueError('Filter not callable') with open(filename) as f: pop = Population() last_chrom = None genotypes = [] for i, line in enumerate(f): if line.startswith('##'): continue elif line.startswith('#'): ind_ids = line.strip().split()[9:] inds = [Individual(pop, ind_id) for ind_id in ind_ids] for ind in inds: pop.register_individual(ind) break for i, line in enumerate(f): record = VCFRecord(line) if info_filters and not all(filter(record) for filter in info_filters): continue if require_pass and not record.filter_passed: continue if record.chrom != last_chrom: if last_chrom is not None: chromobj.finalize() pop.add_chromosome(chromobj) chromobj = ChromosomeTemplate(label=record.chrom) if freq_info is not None and freq_info in record.info: freq = record.info[freq_info] if ',' in freq: freq = freq.split(',')[0] freq = float(freq) else: freq = 0 genorow = record.genotypes() genotypes.append(genorow) chromobj.add_genotype(bp=record.pos, label=record.label, frequency=freq) last_chrom = record.chrom chromobj.finalize() pop.add_chromosome(chromobj) for ind in inds: # Initialize new genotypes ind._init_genotypes(sparse=True) # Now actually sift through markers and assign them to individuals final_indices = [] for chromidx, chromobj in enumerate(pop.chromosomes): indices = zip([chromidx]*chromobj.nmark(), range(chromobj.nmark())) final_indices.extend(indices) raw_indices = range(len(genotypes)) for raw, final in zip(raw_indices, final_indices): chromidx, markidx = final row = genotypes[raw] assign_genorow(row, inds, chromidx, markidx) # Kill the row so we don't end up with the whole dataset in memory twice genotypes[raw] = None return pop