Beispiel #1
0
def test_labelledallele_delabeler():
    ngenos = 10  # Number of genotypes per chromosome
    if ngenos % 2 == 1:
        raise ValueError('Even number of genotypes needed')

    p = Population()
    c = ChromosomeTemplate()
    for i in range(ngenos):
        c.add_genotype()
    p.add_chromosome(c)

    a = Individual(p, 1)
    a._init_genotypes(blankchroms=False)
    a.genotypes[0][0] = Alleles([1]*ngenos)
    a.genotypes[0][1] = Alleles([2]*ngenos)

    b = Individual(p, 2)
    b._init_genotypes(blankchroms=False)
    b.genotypes[0][0] = Alleles([3] * ngenos)
    b.genotypes[0][1] = Alleles([4] * ngenos)


    chromatid_spans = [InheritanceSpan(a, 0, 0, 0, ngenos//2),
                       InheritanceSpan(b, 0, 1, ngenos//2, ngenos)]
    chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c)

    expected_value = [1]*(ngenos//2) + [4] * (ngenos//2)
    expected_value = Alleles(expected_value)

    actual_value = chromatid.delabel()
    assert all(actual_value == expected_value)
Beispiel #2
0
    def mate(self, pop=None, sex=None, label=None):
        """
        Generate offspring from the clique. If more than one father or mother 
        is available, they are randomly selected.

        :param pop: Population for the new individual
        :param sex: Sex of the offspring if specified, otherwise offspring 
            randomly sex is chosen
        :param label: Label for the offspring individual
        :type pop: Population
        :type sex: 0,1

        :returns: offspring individual
        :rtype: Individual
        """
        if not self.children_possible():
            raise ValueError("Children not possible from this clique")

        fa = self.get_male()
        ma = self.get_female()

        if sex is None:
            sex = np.random.randint(0, 2)

        child = Individual(pop, label, fa, ma, sex)
        return child
Beispiel #3
0
    def founder_individual(self, register=True, sex=None):
        "Creates a new founder individual and adds to the population"

        if sex is not None:
            sex = sex.lower()
        sexd = {'m': 0, 'f': 1, None: np.random.choice([0, 1])}
        i = Individual(self, self.size(), None, None, sexd[sex])
        if register:
            self.register_individual(i)
        return i
Beispiel #4
0
def test_labelledalleles():
    IS = InheritanceSpan

    ngenos = 50
    p = Population()
    c = ChromosomeTemplate()
    for i in range(ngenos):
        c.add_genotype()
    p.add_chromosome(c)

    a = Individual(p, 1)
    actual = LabelledAlleles.founder_chromosome(a, 0, 0, chromobj=c)
    expected = LabelledAlleles(spans=[IS(a, 0, 0, 0, ngenos)], chromobj=c)
    assert actual == expected
Beispiel #5
0
def _vcf_parseheader(fileobj):
    pop = Population()
    for line in fileobj:

        if line.startswith('##'):
            continue

        elif line.startswith('#'):
            ind_ids = line.strip().split()[9:]
            inds = [Individual(pop, ind_id) for ind_id in ind_ids]
            for ind in inds:
                pop.register_individual(ind)

            return pop, inds

        else:
            raise FileFormatError("No header line in VCF")
Beispiel #6
0
    def mate(self, ind1, ind2, indlab, sex=None):
        """
        Creates an individual as the child of two specificied individual
        objects and randomly chooses a sex.

        :param ind1: The first parent
        :param ind2: The second parent
        :type ind1: Individual
        :type ind2: Individual
        :param indlab: ID label for the child
        :param sex: Sex of child, randomly chosen if not specified
        :type sex: {0,1}
        :return: An individual with ind1 and ind2 as parents
        :rtype: Individual
        """
        if sex is None:
            sex = np.random.choice([0, 1])
        child = Individual(self, indlab, ind1, ind2, sex)
        return child
Beispiel #7
0
def read_beagle_genotypefile(filename, pop, missingcode='0'):
    '''
    Reads BEAGLE formatted genotype files
    
    Arguments

    :param filename: Filename of BEAGLE genotype file
    :param pop: the population to add these individuals to
    :param missingcode: The value that indicates a missing genotype
    
    :type missingcode: string
    :rtype: void
    '''
    with smartopen(filename) as f:
        for line in f:
            rec = BeagleGenotypeRecord(line)

            if rec.identifier == 'I':
                inds = [Individual(pop, label) for label in rec.data[::2]]
            elif rec.is_phenotype_record:
                for ind, pheno_status in zip(inds, rec.data[::2]):
                    if rec.identifier == 'A':
                        pheno_status = pheno_status == '2'
                    else:
                        try:
                            pheno_status = float(pheno_status)
                        except ValueError:
                            pass
                    ind.phenotypes[rec.label] = pheno_status
            else:
                # We've reached the genotypes, and we're skipping out
                break
        f.seek(0)
        gtrows = [
            list(grouper(BeagleGenotypeRecord(x).data, 2)) for x in f
            if x.startswith('M')
        ]
        genotypes = zip(*gtrows)
        for ind, sequentialalleles in zip(inds, genotypes):
            ind.genotypes = gt_from_seq(ind.chromosomes,
                                        sequentialalleles,
                                        missing_code=missingcode)
Beispiel #8
0
    def create_individual(self, population=None):
        """
        Creates an Individual object from a Pedigree Record.

        The individual will have the id tuple of (fam_id, ind_id)
        
        :param population: Population for the individual to belong to
        :type population: Population

        :rtype: Individual
        """

        # Give a special ind_id for now to prevent overwriting duplicated
        # ind_ids between families
        temp_id = (self.fam, self.ind_id)

        ind = Individual(population, temp_id, self.fa, self.mo,
                         sex_codes[self.sex])

        return ind
Beispiel #9
0
    def next_generation(self, pop, gensize):
        """
        Create individuals for the next generation by random mating

        :param pop: Parent population
        :param gensize: Size of next generation
        :type pop: Population
        :type gensize: int
        """

        males = pop.males()
        females = pop.females()

        fathers = np.random.randint(0, len(males), gensize)
        mothers = np.random.randint(0, len(females), gensize)
        sexes = np.random.randint(0, 2, gensize)

        progeny = [
            Individual(pop, i, males[fathers[i]], females[mothers[i]],
                       sexes[i]) for i in range(gensize)
        ]

        return progeny
Beispiel #10
0
def test_labelledallele_delabeler():
    ngenos = 10  # Number of genotypes per chromosome
    if ngenos % 2 == 1:
        raise ValueError('Even number of genotypes needed')

    p = Population()
    c = ChromosomeTemplate()
    for i in range(ngenos):
        c.add_genotype()
    p.add_chromosome(c)

    a = Individual(p, 1)
    a._init_genotypes(blankchroms=False)
    a.genotypes[0][0] = Alleles([1] * ngenos)
    a.genotypes[0][1] = Alleles([2] * ngenos)

    b = Individual(p, 2)
    b._init_genotypes(blankchroms=False)
    b.genotypes[0][0] = Alleles([3] * ngenos)
    b.genotypes[0][1] = Alleles([4] * ngenos)

    chromatid_spans = [
        InheritanceSpan(a, 0, 0, 0, ngenos // 2),
        InheritanceSpan(b, 0, 1, ngenos // 2, ngenos)
    ]
    chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c)

    expected_value = [1] * (ngenos // 2) + [4] * (ngenos // 2)
    expected_value = Alleles(expected_value)

    actual_value = chromatid.delabel()
    assert all(actual_value == expected_value)
Beispiel #11
0
def read_ped(filename, population=None, delimiter=None, affected_labels=None,
             population_handler=None, data_handler=None, connect_inds=True,
             onlyinds=None):
    """
    Reads a plink format pedigree file, ie:
        familyid indid father mother sex whatever whatever whatever
    into a pydigree pedigree object, with optional population to
    assign to pedigree members. If you don't provide a population
    you can't simulate genotypes!

    Arguments
    -----
    filename: The file to be read
    population: The population to assign individuals to
    delimiter: a string defining the field separator, default: any whitespace
    affected_labels: The labels that determine affection status.
    population_handler: a function to set up the population 
    data_handler: a function to turn the data into useful individual information
    connect_inds: build references between individuals. Requires all
        individuals be present in the file
    onlyinds: a list of individuals to be processed, allows skipping parts
        of a file

    Returns: An object of class PedigreeCollection
    """
    sex_codes = {'1': 0, '2': 1, 'M': 0, 'F': 1, '0': None, '-9': None}
    if not affected_labels:
        affected_labels = {'1': 0, '2': 1,
                           'A': 1, 'U': 0,
                           'X': None,
                           '-9': None}

    # Tries to get a phenotype and returns unknown on failure
    def getph(ph):
        try:
            return affected_labels[ph]
        except KeyError:
            return None

    population = Population()

    p = Pedigree()
    if isinstance(population_handler, Callable):
        population_handler(p)

    pc = PedigreeCollection()

    with open(filename) as f:
        # Parse the lines in the file
        for line in f:
            split = line.strip().split(delimiter)
            if len(split) > 5:
                fam, id, fa, mo, sex, aff = split[0:6]
            elif len(split) == 5:
                fam, id, fa, mo, sex = split[0:5]
                aff = None
            # Give a special id for now, to prevent overwriting duplicated
            # ids between families
            id = (fam, id)

            if onlyinds and (id not in onlyinds):
                continue

            p[id] = Individual(population, id, fa, mo, sex)
            p[id].phenotypes['affected'] = getph(aff)
            p[id].pedigree = p
            p[id].sex = sex_codes[p[id].sex]

            if isinstance(data_handler, Callable) and len(split) > 6:
                data = split[6:]
                data_handler(p[id],  data)

    # Fix the individual-level data
    if connect_inds:
        for ind in p.individuals:
            fam, id = ind.label
            # Actually make the references instead of just pointing at strings
            ind.father = p[(fam, ind.father)] if ind.father != '0' else None
            ind.mother = p[(fam, ind.mother)] if ind.mother != '0' else None

            ind.register_with_parents()

    # Place individuals into pedigrees
    pedigrees = {}
    for ind in p.individuals:
        if ind.label[0] not in pedigrees:
            pedigrees[ind.label[0]] = []

        pedigrees[ind.label[0]].append(ind)

    for pedigree_label, ped_inds in list(pedigrees.items()):
        ped = Pedigree(label=pedigree_label)

        if isinstance(population_handler, Callable):
            population_handler(ped)
        
        for ind in ped_inds:
            ind.label = ind.label[1]
            ped[ind.label] = ind
            ind.population = ped
            ind.pedigree = ped
        pc[pedigree_label] = ped

    return pc