Example #1
0
def test_labelledallele_delabeler():
    ngenos = 10  # Number of genotypes per chromosome
    if ngenos % 2 == 1:
        raise ValueError('Even number of genotypes needed')

    p = Population()
    c = ChromosomeTemplate()
    for i in range(ngenos):
        c.add_genotype()
    p.add_chromosome(c)

    a = Individual(p, 1)
    a._init_genotypes(blankchroms=False)
    a.genotypes[0][0] = Alleles([1] * ngenos)
    a.genotypes[0][1] = Alleles([2] * ngenos)

    b = Individual(p, 2)
    b._init_genotypes(blankchroms=False)
    b.genotypes[0][0] = Alleles([3] * ngenos)
    b.genotypes[0][1] = Alleles([4] * ngenos)

    chromatid_spans = [
        InheritanceSpan(a, 0, 0, 0, ngenos // 2),
        InheritanceSpan(b, 0, 1, ngenos // 2, ngenos)
    ]
    chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c)

    expected_value = [1] * (ngenos // 2) + [4] * (ngenos // 2)
    expected_value = Alleles(expected_value)

    actual_value = chromatid.delabel()
    assert all(actual_value == expected_value)
Example #2
0
def test_labelledallele_delabeler():
    ngenos = 10  # Number of genotypes per chromosome
    if ngenos % 2 == 1:
        raise ValueError('Even number of genotypes needed')

    p = Population()
    c = ChromosomeTemplate()
    for i in range(ngenos):
        c.add_genotype()
    p.add_chromosome(c)

    a = Individual(p, 1)
    a._init_genotypes(blankchroms=False)
    a.genotypes[0][0] = Alleles([1]*ngenos)
    a.genotypes[0][1] = Alleles([2]*ngenos)

    b = Individual(p, 2)
    b._init_genotypes(blankchroms=False)
    b.genotypes[0][0] = Alleles([3] * ngenos)
    b.genotypes[0][1] = Alleles([4] * ngenos)


    chromatid_spans = [InheritanceSpan(a, 0, 0, 0, ngenos//2),
                       InheritanceSpan(b, 0, 1, ngenos//2, ngenos)]
    chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c)

    expected_value = [1]*(ngenos//2) + [4] * (ngenos//2)
    expected_value = Alleles(expected_value)

    actual_value = chromatid.delabel()
    assert all(actual_value == expected_value)
Example #3
0
    def __init__(self, label=None):
        """
        Create a pedigree.

        :param label: pedigree label
        """
        Population.__init__(self)
        self.label = label
        self.kinmat = {}
        self.fratmat = {}
Example #4
0
def test_labelledalleles():
    IS = InheritanceSpan

    ngenos = 50
    p = Population()
    c = ChromosomeTemplate()
    for i in range(ngenos):
        c.add_genotype()
    p.add_chromosome(c)

    a = Individual(p, 1)
    actual = LabelledAlleles.founder_chromosome(a, 0, 0, chromobj=c)
    expected = LabelledAlleles(spans=[IS(a, 0, 0, 0, ngenos)], chromobj=c)
    assert actual == expected
Example #5
0
def test_labelledalleles():
    IS = InheritanceSpan

    ngenos = 50
    p = Population()
    c = ChromosomeTemplate()
    for i in range(ngenos):
        c.add_genotype()
    p.add_chromosome(c)

    a = Individual(p, 1)
    actual = LabelledAlleles.founder_chromosome(a, 0, 0, chromobj=c)
    expected = LabelledAlleles(spans=[IS(a, 0, 0, 0, ngenos)], chromobj=c)
    assert actual == expected
Example #6
0
def _vcf_parseheader(fileobj):
    pop = Population()
    for line in fileobj:

        if line.startswith('##'):
            continue

        elif line.startswith('#'):
            ind_ids = line.strip().split()[9:]
            inds = [Individual(pop, ind_id) for ind_id in ind_ids]
            for ind in inds:
                pop.register_individual(ind)

            return pop, inds

        else:
            raise FileFormatError("No header line in VCF")
Example #7
0
def read_beagle(genofile, markerfile):
    '''
    Reads BEAGLE formatted genotype data

    :param genofile: Filename containing genotype information for individuals
    :param markerfile: Filename containing marker location and allele 
        information corresponding to genofile

    :type genofile: string
    :type markerfile: string

    :rtype: Population
    '''
    pop = Population()
    chrom = read_beagle_markerfile(markerfile)
    chrom.finalize()
    pop.chromosomes.add_chromosome(chrom)

    read_beagle_genotypefile(genofile, pop)

    return pop
Example #8
0
def read_ped(filename,
             population=None,
             delimiter=None,
             affected_labels=None,
             population_handler=None,
             data_handler=None,
             connect_inds=True,
             onlyinds=None):
    """
    Reads a plink format pedigree file, ie:
    
    ::    
        familyid indid father mother sex whatever whatever whatever
    
    into a pydigree pedigree object, with optional population to
    assign to pedigree members. If you don't provide a population
    you can't simulate genotypes!


    :param filename: The file to be read
    :param population: The population to assign individuals to
    :param delimiter: a string defining the field separator, 
        default: any whitespace
    :param affected_labels: The labels that determine affection status.
    :param population_handler: a function to set up the population 
    :param data_handler: a function to turn the 
        data into useful individual information
    :param connect_inds: build references between individuals. Requires all
        individuals be present in the file
    :param onlyinds: only include data for specified individuals 

    :type filename: string
    :type population: Population
    :type delimiter: string
    :type affected_labels: dict (str -> value)
    :type data_handler: callable
    :type connect_inds: bool
    :type onlyinds: iterable 


    :returns: individuals contained in the pedigree file 
    :rtype: PedigreeCollection
    """

    if not affected_labels:
        affected_labels = {
            '1': 0,
            '2': 1,
            'A': 1,
            'U': 0,
            'X': None,
            '-9': None
        }

    if not isinstance(data_handler, Callable):
        data_handler = lambda *x: None

    if not isinstance(population_handler, Callable):
        population_handler = lambda *x: None

    population = Population() if population is None else population
    p = Pedigree()

    population_handler(p)

    # Step 1: Read the data and create the individuals
    with smartopen(filename) as f:
        # Parse the lines in the file
        for line in f:
            rec = PEDRecord(line, delimiter)

            if onlyinds and (rec.ind_id not in onlyinds):
                continue

            ind = rec.create_individual(population)
            ind.pedigree = p
            ind.phenotypes['affected'] = affected_labels.get(rec.aff, None)
            p[ind.label] = ind

            if rec.data:
                data_handler(p[ind.label], rec.data)

    # Step 2: Create the between-individual relationships

    # Fix the individual-level data: individuals currently only have parent-ids
    # in their parent fields and not references to actual individuals
    if connect_inds:
        connect_individuals(p)

    # Step 3: Separate the individuals into pedigrees
    pc = sort_pedigrees(p.individuals, population_handler)

    return pc
Example #9
0
 def __init__(self, label=None):
     Population.__init__(self)
     self.label = label
     self.kinmat = {}
     self.fratmat = {}
Example #10
0
def read_vcf(filename, require_pass=False, freq_info=None, info_filters=None):
    '''
    Reads a VCF file and returns a Population object with the
    individuals represented in the file
    '''
    if not info_filters:
        info_filters = []

    for filter in info_filters:
        if not callable(filter):
            raise ValueError('Filter not callable')

    with open(filename) as f:
        pop = Population()

        last_chrom = None
        genotypes = []

        for i, line in enumerate(f):

            if line.startswith('##'):
                continue

            elif line.startswith('#'):
                ind_ids = line.strip().split()[9:]
                inds = [Individual(pop, ind_id) for ind_id in ind_ids]
                for ind in inds:
                    pop.register_individual(ind)

                break
        
        for i, line in enumerate(f):
            record = VCFRecord(line)

            if info_filters and not all(filter(record) for filter in info_filters):
                continue

            if require_pass and not record.filter_passed:
                continue

            if record.chrom != last_chrom:
                if last_chrom is not None:
                    chromobj.finalize()
                    pop.add_chromosome(chromobj)
                chromobj = ChromosomeTemplate(label=record.chrom)


            if freq_info is not None and freq_info in record.info:
                freq = record.info[freq_info]
                if ',' in freq:
                    freq = freq.split(',')[0]
                freq = float(freq)
            else:
                freq = 0

            genorow = record.genotypes()
            genotypes.append(genorow)

            chromobj.add_genotype(bp=record.pos,
                                  label=record.label,
                                  frequency=freq)

            last_chrom = record.chrom

        chromobj.finalize()
        pop.add_chromosome(chromobj)

    for ind in inds:
        # Initialize new genotypes
        ind._init_genotypes(sparse=True)

    # Now actually sift through markers and assign them to individuals
    final_indices = []
    for chromidx, chromobj  in enumerate(pop.chromosomes):
        indices = zip([chromidx]*chromobj.nmark(), range(chromobj.nmark()))
        final_indices.extend(indices)

    raw_indices = range(len(genotypes))

    for raw, final in zip(raw_indices, final_indices):
        chromidx, markidx = final
        row = genotypes[raw]
        assign_genorow(row, inds, chromidx, markidx)

        # Kill the row so we don't end up with the whole dataset in memory twice
        genotypes[raw] = None
    
    return pop