Beispiel #1
0
    def __init__(self, g_orig, haplotype, num_errors):
        '''Initialize phasing statistics object for the original genotype g/and haplotype set problem.haplotype.'''
        h = haplotype.data
        r_orig = recode.recode_single_genotype(g_orig)
        r = recode.recode_single_genotype(h)
        
        # Sizes
        self.time = 0
        self.num_snps = haplotype.num_snps
        self.num_samples = haplotype.num_samples
        self.num_genotypes = haplotype.num_data / 2
        self.num_haplotypes = haplotype.num_data
        
        # Arrays
        self.fill = np.array([haplotype.fill_fraction(sample=x) for x in xrange(haplotype.num_samples)])
        
        # Fields
        # A Field factory method
        field = lambda index: StatsField(self, h, index)
        self.called_orig = field(recode.where_called(r_orig))
        self.imputed = field(recode.where_full_imputed(r, r_orig))
        self.imputed_partial = field(recode.where_partial_imputed(r, r_orig))
        self.errors = field(recode.where_error(r, r_orig))
        self.errors_partial = field(recode.where_partial_error(r, r_orig))
        self.called = field(recode.where_called(r))
        self.partial_called = field(recode.where_partial_called(r))
        self.still_missing = field(recode.where_still_missing(r, r_orig))

        # Scalars
        self.num_filled_haplotypes = haplotype.num_filled
        self.num_errors = num_errors  # Redundant
Beispiel #2
0
    def __init__(self, g_orig, haplotype, num_errors):
        '''Initialize phasing statistics object for the original genotype g/and haplotype set problem.haplotype.'''
        h = haplotype.data
        r_orig = recode.recode_single_genotype(g_orig)
        r = recode.recode_single_genotype(h)

        # Sizes
        self.time = 0
        self.num_snps = haplotype.num_snps
        self.num_samples = haplotype.num_samples
        self.num_genotypes = haplotype.num_data / 2
        self.num_haplotypes = haplotype.num_data

        # Arrays
        self.fill = np.array([
            haplotype.fill_fraction(sample=x)
            for x in xrange(haplotype.num_samples)
        ])

        # Fields
        # A Field factory method
        field = lambda index: StatsField(self, h, index)
        self.called_orig = field(recode.where_called(r_orig))
        self.imputed = field(recode.where_full_imputed(r, r_orig))
        self.imputed_partial = field(recode.where_partial_imputed(r, r_orig))
        self.errors = field(recode.where_error(r, r_orig))
        self.errors_partial = field(recode.where_partial_error(r, r_orig))
        self.called = field(recode.where_called(r))
        self.partial_called = field(recode.where_partial_called(r))
        self.still_missing = field(recode.where_still_missing(r, r_orig))

        # Scalars
        self.num_filled_haplotypes = haplotype.num_filled
        self.num_errors = num_errors  # Redundant
Beispiel #3
0
def impute_from_fully_called(g, h):
    '''Impute missing genotypes in g from fully-called haplotypes h. The imputation is typically done on the
    genotypes after phasing, which may contain zeroed-out entries found to be Mendelian errors, and thus
    will benefit from imputation here. Partially-filled genotypes are also overridden by haps, if the
    latter are fully called. Returns the number of imputed genotypes'''
    imputed = recode.where_full_imputed(recode.recode_single_genotype(h), recode.recode_single_genotype(g))
    g[imputed[SNP], imputed[SAMPLE], :] = h[imputed[SNP], imputed[SAMPLE], :]
    return len(imputed[0])
Beispiel #4
0
def impute_from_fully_called(g, h):
    '''Impute missing genotypes in g from fully-called haplotypes h. The imputation is typically done on the
    genotypes after phasing, which may contain zeroed-out entries found to be Mendelian errors, and thus
    will benefit from imputation here. Partially-filled genotypes are also overridden by haps, if the
    latter are fully called. Returns the number of imputed genotypes'''
    imputed = recode.where_full_imputed(recode.recode_single_genotype(h),
                                        recode.recode_single_genotype(g))
    g[imputed[SNP], imputed[SAMPLE], :] = h[imputed[SNP], imputed[SAMPLE], :]
    return len(imputed[0])