def write(self, genotype, out, **kwargs): '''Write genotype data to the stream out in IMPUTE2 haplotype format. If snps is specified, only those snp indices are written, otherwise all snps are written.''' # Read optional arguments; truncate data accordingly snps = kwargs.get('snps', genotype.snp_range) samples = kwargs.get('samples', np.arange(genotype.num_samples)) flip_alleles = kwargs.get('flip_alleles', np.zeros(len(snps), dtype=bool)) # Zero out partially-called genotypes (an IMPUTE2 requirement) data = genotype.data[snps][:, samples, :].copy() recode.clear_partial_calls(data) # Convert SNP data to string snp_metadata = genotype.snp[ snps] # [np.array(['chrom', 'name', 'dist_cm', 'base_pair'])] snp_as_string = np.array([str(y) for x in snp_metadata for y in x]).reshape((len(snps), 4)) # Recode alleles + missing data recoding = {MISSING: '?', 1: '0', 2: '1'} r = np.vectorize(recoding.__getitem__) data_str = r(data) # Flip alleles recoding_flipped = {MISSING: '?', 1: '1', 2: '0'} r = np.vectorize(recoding_flipped.__getitem__) data_str[flip_alleles] = r(data[flip_alleles]) for i in xrange(len(snps)): np.savetxt(out, snp_as_string[i], fmt='%s', newline=' ') np.savetxt(out, data_str[i, :, :], fmt='%s', newline=' ') out.write('\n')
def write(self, genotype, out, **kwargs): """Write genotype data to the stream out in IMPUTE2 haplotype format. If snps is specified, only those snp indices are written, otherwise all snps are written.""" # Read optional arguments; truncate data accordingly snps = kwargs.get("snps", genotype.snp_range) samples = kwargs.get("samples", np.arange(genotype.num_samples)) flip_alleles = kwargs.get("flip_alleles", np.zeros(len(snps), dtype=bool)) # Zero out partially-called genotypes (an IMPUTE2 requirement) data = genotype.data[snps][:, samples, :].copy() recode.clear_partial_calls(data) # Convert SNP data to string snp_metadata = genotype.snp[snps] # [np.array(['chrom', 'name', 'dist_cm', 'base_pair'])] snp_as_string = np.array([str(y) for x in snp_metadata for y in x]).reshape((len(snps), 4)) # Recode alleles + missing data recoding = {MISSING: "?", 1: "0", 2: "1"} r = np.vectorize(recoding.__getitem__) data_str = r(data) # Flip alleles recoding_flipped = {MISSING: "?", 1: "1", 2: "0"} r = np.vectorize(recoding_flipped.__getitem__) data_str[flip_alleles] = r(data[flip_alleles]) for i in xrange(len(snps)): np.savetxt(out, snp_as_string[i], fmt="%s", newline=" ") np.savetxt(out, data_str[i, :, :], fmt="%s", newline=" ") out.write("\n")
def __handle_impute_from_fully_called(self, request): '''A request handler that imputes missing genotypes in g from fully-called haplotypes h. Clear partially-called genotypes.''' (g, h) = request.problem.data num_imputed = impute_from_fully_called(g, h) if request.params.debug: print 'Imputed %d genotypes' % (num_imputed,) # Zero-out partial haplotypes. Note: PLINK requires full genotypes, so expect failure # if this option is turned off. Therefore it is always on, and we only impute from FULLY called # haps above. recode.clear_partial_calls(g) return False
def __handle_impute_from_fully_called(self, request): '''A request handler that imputes missing genotypes in g from fully-called haplotypes h. Clear partially-called genotypes.''' (g, h) = request.problem.data num_imputed = impute_from_fully_called(g, h) if request.params.debug: print 'Imputed %d genotypes' % (num_imputed, ) # Zero-out partial haplotypes. Note: PLINK requires full genotypes, so expect failure # if this option is turned off. Therefore it is always on, and we only impute from FULLY called # haps above. recode.clear_partial_calls(g) return False