def read_beagle_markerfile(filename, label=None): """ Reads marker locations from a BEAGLE formatted file :param filename: The file to be read :param label: An optional label to give the chromosome, since the BEAGLE format does not require it :type filename: string :rtype: ChromosomeTemplate """ with smartopen(filename) as f: chrom = ChromosomeTemplate(label=label) last_pos = -1 for line in f: rec = BeagleMarkerRecord(line) if rec.pos < 0: raise FileFormatError( 'Bad position for genotype: {}'.format(rec.pos)) elif rec.pos <= last_pos: raise FileFormatError('Makers in file out of order') chrom.add_genotype(None, map_position=None, label=rec.label, bp=rec.pos, reference=rec.reference, alternates=rec.alternates) last_pos = rec.pos return chrom
def test_labelledallele_delabeler(): ngenos = 10 # Number of genotypes per chromosome if ngenos % 2 == 1: raise ValueError('Even number of genotypes needed') p = Population() c = ChromosomeTemplate() for i in range(ngenos): c.add_genotype() p.add_chromosome(c) a = Individual(p, 1) a._init_genotypes(blankchroms=False) a.genotypes[0][0] = Alleles([1]*ngenos) a.genotypes[0][1] = Alleles([2]*ngenos) b = Individual(p, 2) b._init_genotypes(blankchroms=False) b.genotypes[0][0] = Alleles([3] * ngenos) b.genotypes[0][1] = Alleles([4] * ngenos) chromatid_spans = [InheritanceSpan(a, 0, 0, 0, ngenos//2), InheritanceSpan(b, 0, 1, ngenos//2, ngenos)] chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c) expected_value = [1]*(ngenos//2) + [4] * (ngenos//2) expected_value = Alleles(expected_value) actual_value = chromatid.delabel() assert all(actual_value == expected_value)
def read_map(filename, only=None): with open(filename) as f: line = f.readline() header = line.strip().split() sex_specific_map = len(header) == 5 lastchrom = None chromosomes = [] for markernum, line in enumerate(f): l = line.strip().split() if sex_specific_map: chrom, marker, pos, male_pos, female_pos = l else: chrom, marker, pos = l if only and marker not in only: continue pos = float(pos) if chrom != lastchrom: if lastchrom: c.finalize() c = ChromosomeTemplate(label=chrom) chromosomes.append(c) lastchrom = chrom c.add_genotype(map_position=pos, label=marker) for chrom in chromosomes: chrom.finalize() return chromosomes
def read_map(mapfile): """ Reads a PLINK map file into a list of ChromosomeTemplate objects Arguments: mapfile: The file to be read Returns: a list of ChromosomeTemplate objects """ last_chr, last_pos = None, 0 chroms = [] chromosome = None with open(mapfile) as f: for i, line in enumerate(f): line = line.strip().split() chr, label, cm, pos = line cm, pos = float(cm), int(pos) if pos < 0: raise FileFormatError("Invalid position: {}".format(pos)) if chr != last_chr: # If this happens, we've moved on to a new chromosome, # or we've just started. If we haven't just started, We'll # close up the old one if i > 0: chromosome.finalize() chroms.append(chromosome) # Make the next chromosome chromosome = ChromosomeTemplate(label=chr) elif pos < last_pos: raise FileFormatError("Map file not sorted") chromosome.add_genotype(None, cm, label=label, bp=pos) last_chr, last_pos = chr, pos chromosome.finalize() chroms.append(chromosome) return chroms
def read_beagle_markerfile(filename, label=None): """ Reads marker locations from a BEAGLE formatted file :param filename: The file to be read :param label: An optional label to give the chromosome, since the BEAGLE format does not require it :type filename: string :rtype: ChromosomeTemplate """ with smartopen(filename) as f: chrom = ChromosomeTemplate(label=label) last_pos = -1 for line in f: rec = BeagleMarkerRecord(line) if rec.pos < 0: raise FileFormatError('Bad position for genotype: {}'.format( rec.pos)) elif rec.pos <= last_pos: raise FileFormatError('Makers in file out of order') chrom.add_genotype(None, map_position=None, label=rec.label, bp=rec.pos, reference=rec.reference, alternates=rec.alternates) last_pos = rec.pos return chrom
def read_beagle_markerfile(filename, label=None): ''' Reads marker locations from a beagle format file Arguments ----- filename: The file to be read label: An optional label to give the chromosome, since the BEAGLE format does not require it Returns: a ChromosomeTemplate object ''' with open(filename) as f: chrom = ChromosomeTemplate(label=label) last_pos = -1 for line in f: rec = BeagleMarkerRecord(line) if rec.pos < 0: raise FileFormatError( 'Bad position for genotype: {}'.format(rec.pos)) elif rec.pos <= last_pos: raise FileFormatError('Makers in file out of order') chrom.add_genotype(None, cm=None, label=rec.label, bp=rec.pos, reference=rec.reference, alternates=rec.alternates) last_pos = rec.pos return chrom
def test_labelledallele_delabeler(): ngenos = 10 # Number of genotypes per chromosome if ngenos % 2 == 1: raise ValueError('Even number of genotypes needed') p = Population() c = ChromosomeTemplate() for i in range(ngenos): c.add_genotype() p.add_chromosome(c) a = Individual(p, 1) a._init_genotypes(blankchroms=False) a.genotypes[0][0] = Alleles([1] * ngenos) a.genotypes[0][1] = Alleles([2] * ngenos) b = Individual(p, 2) b._init_genotypes(blankchroms=False) b.genotypes[0][0] = Alleles([3] * ngenos) b.genotypes[0][1] = Alleles([4] * ngenos) chromatid_spans = [ InheritanceSpan(a, 0, 0, 0, ngenos // 2), InheritanceSpan(b, 0, 1, ngenos // 2, ngenos) ] chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c) expected_value = [1] * (ngenos // 2) + [4] * (ngenos // 2) expected_value = Alleles(expected_value) actual_value = chromatid.delabel() assert all(actual_value == expected_value)
def test_labelledalleles(): IS = InheritanceSpan ngenos = 50 p = Population() c = ChromosomeTemplate() for i in range(ngenos): c.add_genotype() p.add_chromosome(c) a = Individual(p, 1) actual = LabelledAlleles.founder_chromosome(a, 0, 0, chromobj=c) expected = LabelledAlleles(spans=[IS(a, 0, 0, 0, ngenos)], chromobj=c) assert actual == expected
def test_chromosometemplate(): # Test the marker finder c = ChromosomeTemplate() for i in range(1, 100): c.add_genotype(map_position=i, bp=(i * 1e6)) assert c.closest_marker(0) == 0 assert c.closest_marker(5000001) == 4 assert c.closest_marker(5999999) == 5 assert c.closest_marker(1e10) == c.nmark() - 1
def add_dummy_polygene_chromosomes(self, population, nloc, mean=0, sd=1, freqs=None, polylabel='Polygene'): """ Creates many independently segregating chromosomes that additively influence the trait. :param population: The population to add the chromosomes to :param nloc: The number of dummy chromosomes to create :param mean: Mean locus additive effect :param sd: Standard deviation of locus additive effect :param freqs: frequencies of each polygene :param polylabel: Label to add give chromosome :type nloc: integer :type mean: float :type sd: float :type freqs: sequence of floats :type polylabel: string :rtype: void """ if freqs is None: freqs = np.zeros(nloc, dtype=np.float) + 0.5 if sd == 0: effects = [mean] * nloc else: effects = np.random.normal(mean, sd, nloc) for i, effect in enumerate(effects): # Create the chromosome lab = '{}{}'.format(polylabel, i) c = ChromosomeTemplate(label=lab) c.add_genotype(freqs[i], 0) population.add_chromosome(c) # Add the effect locus = i, 0 self.add_effect(locus, a=effect, k=0)
def test_chromosometemplate(): # Test the marker finder c = ChromosomeTemplate() for i in range(1,100): c.add_genotype(map_position=i, bp=(i*1e6)) assert c.closest_marker(0) == 0 assert c.closest_marker(5000001) == 4 assert c.closest_marker(5999999) == 5 assert c.closest_marker(1e10) == c.nmark() - 1
def read_map(mapfile): """ Reads a PLINK map file into a list of ChromosomeTemplate objects :param mapfile: Path of the file to be read :type mapfile: string :rtype: a list of ChromosomeTemplate objects """ last_chr, last_pos = None, 0 chroms = ChromosomeSet() chromosome = None with smartopen(mapfile) as f: for i, line in enumerate(f): line = line.strip().split() chrom, label, cm, pos = line cm, pos = float(cm), int(pos) if pos < 0: raise FileFormatError('Invalid position: {}'.format(pos)) if chrom != last_chr: # If this happens, we've moved on to a new chromosome, # or we've just started. If we haven't just started, We'll # close up the old one if i > 0: chromosome.finalize() chroms.add_chromosome(chromosome) # Make the next chromosome chromosome = ChromosomeTemplate(label=chrom) elif pos < last_pos: raise FileFormatError('Map file not sorted') chromosome.add_genotype(None, cm, label=label, bp=pos) last_chr, last_pos = chrom, pos chromosome.finalize() chroms.add_chromosome(chromosome) return chroms
def blank_chromosome(size=2): ch = ChromosomeTemplate() for i in range(size): ch.add_genotype() return ch
def read_vcf(filename, require_pass=False, freq_info=None): """ Reads a VCF file and returns a Population object with the individuals represented in the file Genotypes generated by this function will be sparse :param require_pass: only allow variants with PASS under FILTER :type require_pass: bool :param freq_info: INFO field to get allele frequency from :param freq_info: string :returns: Individuals in the VCF :rtype: Population """ with smartopen(filename) as f: genotypes = [] pop, inds = _vcf_parseheader(f) last_chrom = None chromobj = None for line in f: record = VCFRecord(line) if require_pass and not record.filter_passed: continue if record.chrom != last_chrom: if last_chrom is not None: pop.add_chromosome(chromobj) chromobj = ChromosomeTemplate(label=record.chrom) if freq_info is not None: freq = _vcf_get_infofreq(record.info, freq_info) else: freq = 0 genorow = record.genotypes() genotypes.append(genorow) chromobj.add_genotype(bp=record.pos, label=record.label, frequency=freq) last_chrom = record.chrom pop.add_chromosome(chromobj) pop.chromosomes.finalize() for ind in inds: # Initialize new genotypes ind._init_genotypes(sparse=True) # Now actually sift through markers and assign them to individuals final_indices = [] for chromidx, chromobj in enumerate(pop.chromosomes): indices = zip([chromidx] * chromobj.nmark(), range(chromobj.nmark())) final_indices.extend(indices) raw_indices = range(len(genotypes)) for raw, final in zip(raw_indices, final_indices): chromidx, markidx = final row = genotypes[raw] assign_genorow(row, inds, chromidx, markidx) # Kill the row so we don't end up with the whole dataset in memory twice genotypes[raw] = None return pop
def read_vcf(filename, require_pass=False, freq_info=None, info_filters=None): ''' Reads a VCF file and returns a Population object with the individuals represented in the file ''' if not info_filters: info_filters = [] for filter in info_filters: if not callable(filter): raise ValueError('Filter not callable') with open(filename) as f: pop = Population() last_chrom = None genotypes = [] for i, line in enumerate(f): if line.startswith('##'): continue elif line.startswith('#'): ind_ids = line.strip().split()[9:] inds = [Individual(pop, ind_id) for ind_id in ind_ids] for ind in inds: pop.register_individual(ind) break for i, line in enumerate(f): record = VCFRecord(line) if info_filters and not all(filter(record) for filter in info_filters): continue if require_pass and not record.filter_passed: continue if record.chrom != last_chrom: if last_chrom is not None: chromobj.finalize() pop.add_chromosome(chromobj) chromobj = ChromosomeTemplate(label=record.chrom) if freq_info is not None and freq_info in record.info: freq = record.info[freq_info] if ',' in freq: freq = freq.split(',')[0] freq = float(freq) else: freq = 0 genorow = record.genotypes() genotypes.append(genorow) chromobj.add_genotype(bp=record.pos, label=record.label, frequency=freq) last_chrom = record.chrom chromobj.finalize() pop.add_chromosome(chromobj) for ind in inds: # Initialize new genotypes ind._init_genotypes(sparse=True) # Now actually sift through markers and assign them to individuals final_indices = [] for chromidx, chromobj in enumerate(pop.chromosomes): indices = zip([chromidx]*chromobj.nmark(), range(chromobj.nmark())) final_indices.extend(indices) raw_indices = range(len(genotypes)) for raw, final in zip(raw_indices, final_indices): chromidx, markidx = final row = genotypes[raw] assign_genorow(row, inds, chromidx, markidx) # Kill the row so we don't end up with the whole dataset in memory twice genotypes[raw] = None return pop