コード例 #1
0
ファイル: test_alleles.py プロジェクト: jameshicks/pydigree
def test_labelledallele_delabeler():
    ngenos = 10  # Number of genotypes per chromosome
    if ngenos % 2 == 1:
        raise ValueError('Even number of genotypes needed')

    p = Population()
    c = ChromosomeTemplate()
    for i in range(ngenos):
        c.add_genotype()
    p.add_chromosome(c)

    a = Individual(p, 1)
    a._init_genotypes(blankchroms=False)
    a.genotypes[0][0] = Alleles([1]*ngenos)
    a.genotypes[0][1] = Alleles([2]*ngenos)

    b = Individual(p, 2)
    b._init_genotypes(blankchroms=False)
    b.genotypes[0][0] = Alleles([3] * ngenos)
    b.genotypes[0][1] = Alleles([4] * ngenos)


    chromatid_spans = [InheritanceSpan(a, 0, 0, 0, ngenos//2),
                       InheritanceSpan(b, 0, 1, ngenos//2, ngenos)]
    chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c)

    expected_value = [1]*(ngenos//2) + [4] * (ngenos//2)
    expected_value = Alleles(expected_value)

    actual_value = chromatid.delabel()
    assert all(actual_value == expected_value)
コード例 #2
0
def test_labelledallele_delabeler():
    ngenos = 10  # Number of genotypes per chromosome
    if ngenos % 2 == 1:
        raise ValueError('Even number of genotypes needed')

    p = Population()
    c = ChromosomeTemplate()
    for i in range(ngenos):
        c.add_genotype()
    p.add_chromosome(c)

    a = Individual(p, 1)
    a._init_genotypes(blankchroms=False)
    a.genotypes[0][0] = Alleles([1] * ngenos)
    a.genotypes[0][1] = Alleles([2] * ngenos)

    b = Individual(p, 2)
    b._init_genotypes(blankchroms=False)
    b.genotypes[0][0] = Alleles([3] * ngenos)
    b.genotypes[0][1] = Alleles([4] * ngenos)

    chromatid_spans = [
        InheritanceSpan(a, 0, 0, 0, ngenos // 2),
        InheritanceSpan(b, 0, 1, ngenos // 2, ngenos)
    ]
    chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c)

    expected_value = [1] * (ngenos // 2) + [4] * (ngenos // 2)
    expected_value = Alleles(expected_value)

    actual_value = chromatid.delabel()
    assert all(actual_value == expected_value)
コード例 #3
0
ファイル: test_alleles.py プロジェクト: jameshicks/pydigree
def test_labelledalleles():
    IS = InheritanceSpan

    ngenos = 50
    p = Population()
    c = ChromosomeTemplate()
    for i in range(ngenos):
        c.add_genotype()
    p.add_chromosome(c)

    a = Individual(p, 1)
    actual = LabelledAlleles.founder_chromosome(a, 0, 0, chromobj=c)
    expected = LabelledAlleles(spans=[IS(a, 0, 0, 0, ngenos)], chromobj=c)
    assert actual == expected
コード例 #4
0
def test_labelledalleles():
    IS = InheritanceSpan

    ngenos = 50
    p = Population()
    c = ChromosomeTemplate()
    for i in range(ngenos):
        c.add_genotype()
    p.add_chromosome(c)

    a = Individual(p, 1)
    actual = LabelledAlleles.founder_chromosome(a, 0, 0, chromobj=c)
    expected = LabelledAlleles(spans=[IS(a, 0, 0, 0, ngenos)], chromobj=c)
    assert actual == expected
コード例 #5
0
def read_vcf(filename, require_pass=False, freq_info=None, info_filters=None):
    '''
    Reads a VCF file and returns a Population object with the
    individuals represented in the file
    '''
    if not info_filters:
        info_filters = []

    for filter in info_filters:
        if not callable(filter):
            raise ValueError('Filter not callable')

    with open(filename) as f:
        pop = Population()

        last_chrom = None
        genotypes = []

        for i, line in enumerate(f):

            if line.startswith('##'):
                continue

            elif line.startswith('#'):
                ind_ids = line.strip().split()[9:]
                inds = [Individual(pop, ind_id) for ind_id in ind_ids]
                for ind in inds:
                    pop.register_individual(ind)

                break
        
        for i, line in enumerate(f):
            record = VCFRecord(line)

            if info_filters and not all(filter(record) for filter in info_filters):
                continue

            if require_pass and not record.filter_passed:
                continue

            if record.chrom != last_chrom:
                if last_chrom is not None:
                    chromobj.finalize()
                    pop.add_chromosome(chromobj)
                chromobj = ChromosomeTemplate(label=record.chrom)


            if freq_info is not None and freq_info in record.info:
                freq = record.info[freq_info]
                if ',' in freq:
                    freq = freq.split(',')[0]
                freq = float(freq)
            else:
                freq = 0

            genorow = record.genotypes()
            genotypes.append(genorow)

            chromobj.add_genotype(bp=record.pos,
                                  label=record.label,
                                  frequency=freq)

            last_chrom = record.chrom

        chromobj.finalize()
        pop.add_chromosome(chromobj)

    for ind in inds:
        # Initialize new genotypes
        ind._init_genotypes(sparse=True)

    # Now actually sift through markers and assign them to individuals
    final_indices = []
    for chromidx, chromobj  in enumerate(pop.chromosomes):
        indices = zip([chromidx]*chromobj.nmark(), range(chromobj.nmark()))
        final_indices.extend(indices)

    raw_indices = range(len(genotypes))

    for raw, final in zip(raw_indices, final_indices):
        chromidx, markidx = final
        row = genotypes[raw]
        assign_genorow(row, inds, chromidx, markidx)

        # Kill the row so we don't end up with the whole dataset in memory twice
        genotypes[raw] = None
    
    return pop