Example #1
0
    def write(self, genotype, out, **kwargs):
        '''Write genotype data to the stream out in IMPUTE2 haplotype format. If snps is specified, 
        only those snp indices are written, otherwise all snps are written.'''

        # Read optional arguments; truncate data accordingly
        snps = kwargs.get('snps', genotype.snp_range)
        samples = kwargs.get('samples', np.arange(genotype.num_samples))
        flip_alleles = kwargs.get('flip_alleles',
                                  np.zeros(len(snps), dtype=bool))

        # Zero out partially-called genotypes (an IMPUTE2 requirement)
        data = genotype.data[snps][:, samples, :].copy()
        recode.clear_partial_calls(data)

        # Convert SNP data to string
        snp_metadata = genotype.snp[
            snps]  # [np.array(['chrom', 'name', 'dist_cm', 'base_pair'])]
        snp_as_string = np.array([str(y) for x in snp_metadata
                                  for y in x]).reshape((len(snps), 4))

        # Recode alleles + missing data
        recoding = {MISSING: '?', 1: '0', 2: '1'}
        r = np.vectorize(recoding.__getitem__)
        data_str = r(data)
        # Flip alleles
        recoding_flipped = {MISSING: '?', 1: '1', 2: '0'}
        r = np.vectorize(recoding_flipped.__getitem__)
        data_str[flip_alleles] = r(data[flip_alleles])

        for i in xrange(len(snps)):
            np.savetxt(out, snp_as_string[i], fmt='%s', newline=' ')
            np.savetxt(out, data_str[i, :, :], fmt='%s', newline=' ')
            out.write('\n')
Example #2
0
    def write(self, genotype, out, **kwargs):
        """Write genotype data to the stream out in IMPUTE2 haplotype format. If snps is specified, 
        only those snp indices are written, otherwise all snps are written."""

        # Read optional arguments; truncate data accordingly
        snps = kwargs.get("snps", genotype.snp_range)
        samples = kwargs.get("samples", np.arange(genotype.num_samples))
        flip_alleles = kwargs.get("flip_alleles", np.zeros(len(snps), dtype=bool))

        # Zero out partially-called genotypes (an IMPUTE2 requirement)
        data = genotype.data[snps][:, samples, :].copy()
        recode.clear_partial_calls(data)

        # Convert SNP data to string
        snp_metadata = genotype.snp[snps]  # [np.array(['chrom', 'name', 'dist_cm', 'base_pair'])]
        snp_as_string = np.array([str(y) for x in snp_metadata for y in x]).reshape((len(snps), 4))

        # Recode alleles + missing data
        recoding = {MISSING: "?", 1: "0", 2: "1"}
        r = np.vectorize(recoding.__getitem__)
        data_str = r(data)
        # Flip alleles
        recoding_flipped = {MISSING: "?", 1: "1", 2: "0"}
        r = np.vectorize(recoding_flipped.__getitem__)
        data_str[flip_alleles] = r(data[flip_alleles])

        for i in xrange(len(snps)):
            np.savetxt(out, snp_as_string[i], fmt="%s", newline=" ")
            np.savetxt(out, data_str[i, :, :], fmt="%s", newline=" ")
            out.write("\n")
Example #3
0
def __handle_impute_from_fully_called(self, request):
    '''A request handler that imputes missing genotypes in g from fully-called haplotypes h. Clear
    partially-called genotypes.'''
    (g, h) = request.problem.data
    num_imputed = impute_from_fully_called(g, h)
    if request.params.debug: print 'Imputed %d genotypes' % (num_imputed,)
    # Zero-out partial haplotypes. Note: PLINK requires full genotypes, so expect failure
    # if this option is turned off. Therefore it is always on, and we only impute from FULLY called
    # haps above.
    recode.clear_partial_calls(g)
    return False
Example #4
0
def __handle_impute_from_fully_called(self, request):
    '''A request handler that imputes missing genotypes in g from fully-called haplotypes h. Clear
    partially-called genotypes.'''
    (g, h) = request.problem.data
    num_imputed = impute_from_fully_called(g, h)
    if request.params.debug: print 'Imputed %d genotypes' % (num_imputed, )
    # Zero-out partial haplotypes. Note: PLINK requires full genotypes, so expect failure
    # if this option is turned off. Therefore it is always on, and we only impute from FULLY called
    # haps above.
    recode.clear_partial_calls(g)
    return False