コード例 #1
0
ファイル: test_common.py プロジェクト: y-chai/pydigree
def test_grouper():
    groups = list(grouper([1, 2] * 100, 2))
    assert all(len(x) == 2 for x in groups)
    assert all(x == (1, 2) for x in groups)

    groups = list(grouper([1, 2] * 100 + [1], 2))
    assert groups[-1] == (1, None)
コード例 #2
0
ファイル: test_common.py プロジェクト: jameshicks/pydigree
def test_grouper():
    groups = list(grouper([1,2]*100, 2))
    assert all(len(x) == 2 for x in groups)
    assert all(x == (1,2) for x in groups)

    groups = list(grouper([1,2]*100 + [1], 2))
    assert groups[-1] == (1, None)
コード例 #3
0
def test_ld():
    # Data from Hartl & Clark, Table 2.2 (pg 85)
    haplotypes = chain([Alleles(['A','B'])] * 25,
        [Alleles(['A','b'])] * 475, [Alleles(['a','B'])] * 475,
        [Alleles(['a','b'])] * 9025)

    pop = Population()
    for chroms in grouper(haplotypes,2):
        ind = pop.founder_individual()
        ind.genotypes = [chroms]

    known_D = 0
    assert pop.ld( (0,0), (0,1), method='D') == known_D
コード例 #4
0
def read_beagle_genotypefile(filename, pop, missingcode='0'):
    '''
    Reads BEAGLE formatted genotype files
    
    Arguments

    :param filename: Filename of BEAGLE genotype file
    :param pop: the population to add these individuals to
    :param missingcode: The value that indicates a missing genotype
    
    :type missingcode: string
    :rtype: void
    '''
    with smartopen(filename) as f:
        for line in f:
            rec = BeagleGenotypeRecord(line)

            if rec.identifier == 'I':
                inds = [Individual(pop, label) for label in rec.data[::2]]
            elif rec.is_phenotype_record:
                for ind, pheno_status in zip(inds, rec.data[::2]):
                    if rec.identifier == 'A':
                        pheno_status = pheno_status == '2'
                    else:
                        try:
                            pheno_status = float(pheno_status)
                        except ValueError:
                            pass
                    ind.phenotypes[rec.label] = pheno_status
            else:
                # We've reached the genotypes, and we're skipping out
                break
        f.seek(0)
        gtrows = [
            list(grouper(BeagleGenotypeRecord(x).data, 2)) for x in f
            if x.startswith('M')
        ]
        genotypes = zip(*gtrows)
        for ind, sequentialalleles in zip(inds, genotypes):
            ind.genotypes = gt_from_seq(ind.chromosomes,
                                        sequentialalleles,
                                        missing_code=missingcode)
コード例 #5
0
ファイル: beagle.py プロジェクト: jameshicks/pydigree
def read_beagle_genotypefile(filename, pop, missingcode='0'):
    '''
    Reads BEAGLE formatted genotype files
    
    Arguments

    :param filename: Filename of BEAGLE genotype file
    :param pop: the population to add these individuals to
    :param missingcode: The value that indicates a missing genotype
    
    :type missingcode: string
    :rtype: void
    '''
    with smartopen(filename) as f:
        for line in f:
            rec = BeagleGenotypeRecord(line)

            if rec.identifier == 'I':
                inds = [Individual(pop, label) for label in rec.data[::2]]
            elif rec.is_phenotype_record:
                for ind, pheno_status in zip(inds, rec.data[::2]):
                    if rec.identifier == 'A':
                        pheno_status = pheno_status == '2'
                    else:
                        try:
                            pheno_status = float(pheno_status)
                        except ValueError:
                            pass
                    ind.phenotypes[rec.label] = pheno_status
            else:
                # We've reached the genotypes, and we're skipping out
                break
        f.seek(0)
        gtrows = [list(grouper(BeagleGenotypeRecord(x).data, 2))
                  for x in f if x.startswith('M')]
        genotypes = zip(*gtrows)
        for ind, sequentialalleles in zip(inds, genotypes):
            ind.genotypes = gt_from_seq(ind.chromosomes,
                                        sequentialalleles,
                                        missing_code=missingcode)