def __phase_duo_child(self, problem, params, parent_type): '''A helper method that phases a child based on a single parent of type parent_type.''' # Find genotyped children whose parent is genotyped duo = pt.selected_duos(problem, params, parent_type) if not duo.size: return False g, h = problem.data gc, gp = g[:, duo[:, 0], :], g[:, duo[:, 1], :] # Restrict view to snps that are homozygous in parent hom = np.where(gt.is_homozygous(gp)[:, :]) parent_allele, gc_hom = gp[hom[0], hom[1], 0], gc[hom[0], hom[1], :] #------------------------------------------------------------------------------------ # Case A: Parent = (a,a), child = (b,b) and a != b (incompatible) ==> error #------------------------------------------------------------------------------------ j = np.where( np.logical_and( gt.is_homozygous(gc_hom)[:], gc_hom[:, 0] != parent_allele))[0] # Flag errors in both children and parents for i in xrange(2): problem.genotype_error(hom[0][j], duo[hom[1][j], i], 'Homozygous parent allele not found in child') #------------------------------------------------------------------------------------ # Case B: Parent = (a,a), child = (a,x) or (x,a), x in {0,1,2} (compatible) ==> # set child parent hap to a and other hap to the other child genotype (x) #------------------------------------------------------------------------------------ # Note: h[array,array,:] is not a reference into h like h[scalar,scalar,:]. Thus, # when setting h, we must use h[original coordinates here] = ... . This occurs # several times in the code of this file. # Determine child haplotype corresponding to the hom parent snps = gt.index_of(gc_hom, parent_allele) parent_allele_at_snps = parent_allele[snps] snp_index, child_index = hom[0][snps], duo[hom[1][snps], 0] h[snp_index, child_index, parent_type] = parent_allele_at_snps # Determine child haplotype corresponding to the other parent gc_rel = g[snp_index, child_index, :] other = np.where( gc_rel != np.transpose(np.tile(parent_allele_at_snps, (2, 1)))) h[snp_index[other[0]], child_index[other[0]], 1 - parent_type] = gc_rel[other] #------------------------------------------------------------------------------------ # Case C: Parent = (a,a), child = (0,x) or (x,0) (potentially compatible) ==> # impute child to a and set child hap to a #------------------------------------------------------------------------------------ for allele, snps in dict(zip(ALLELES, gt.index_first_missing(gc_hom))).iteritems(): parent_value = parent_allele[snps] snp_original = hom[0][snps] gc[snp_original, hom[1][snps], allele] = parent_value # if self.debug: # print 'Imputing child', (snp_original, hom[1][snps], allele, parent_value) # problem.info.imputed_genotype.append((snp_original, hom[1][snps], allele, parent_value)) h[snp_original, duo[hom[1][snps], 0], parent_type] = parent_value return False
def __phase_duo_child(self, problem, params, parent_type): '''A helper method that phases a child based on a single parent of type parent_type.''' # Find genotyped children whose parent is genotyped duo = pt.selected_duos(problem, params, parent_type) if not duo.size: return False g, h = problem.data gc, gp = g[:, duo[:, 0], :], g[:, duo[:, 1], :] # Restrict view to snps that are homozygous in parent hom = np.where(gt.is_homozygous(gp)[:, :]) parent_allele, gc_hom = gp[hom[0], hom[1], 0], gc[hom[0], hom[1], :] #------------------------------------------------------------------------------------ # Case A: Parent = (a,a), child = (b,b) and a != b (incompatible) ==> error #------------------------------------------------------------------------------------ j = np.where(np.logical_and(gt.is_homozygous(gc_hom)[:], gc_hom[:, 0] != parent_allele))[0] # Flag errors in both children and parents for i in xrange(2): problem.genotype_error(hom[0][j], duo[hom[1][j], i], 'Homozygous parent allele not found in child') #------------------------------------------------------------------------------------ # Case B: Parent = (a,a), child = (a,x) or (x,a), x in {0,1,2} (compatible) ==> # set child parent hap to a and other hap to the other child genotype (x) #------------------------------------------------------------------------------------ # Note: h[array,array,:] is not a reference into h like h[scalar,scalar,:]. Thus, # when setting h, we must use h[original coordinates here] = ... . This occurs # several times in the code of this file. # Determine child haplotype corresponding to the hom parent snps = gt.index_of(gc_hom, parent_allele) parent_allele_at_snps = parent_allele[snps] snp_index, child_index = hom[0][snps], duo[hom[1][snps], 0] h[snp_index, child_index, parent_type] = parent_allele_at_snps # Determine child haplotype corresponding to the other parent gc_rel = g[snp_index, child_index, :] other = np.where(gc_rel != np.transpose(np.tile(parent_allele_at_snps, (2, 1)))) h[snp_index[other[0]], child_index[other[0]], 1 - parent_type] = gc_rel[other] #------------------------------------------------------------------------------------ # Case C: Parent = (a,a), child = (0,x) or (x,0) (potentially compatible) ==> # impute child to a and set child hap to a #------------------------------------------------------------------------------------ for allele, snps in dict(zip(ALLELES, gt.index_first_missing(gc_hom))).iteritems(): parent_value = parent_allele[snps] snp_original = hom[0][snps] gc[snp_original, hom[1][snps], allele] = parent_value # if self.debug: # print 'Imputing child', (snp_original, hom[1][snps], allele, parent_value) # problem.info.imputed_genotype.append((snp_original, hom[1][snps], allele, parent_value)) h[snp_original, duo[hom[1][snps], 0], parent_type] = parent_value return False
def __handle_hom_entries(self, request): '''Phase all homozygous SNPs: find all homozygous SNPs in the genotype set and set their corresponding haplotypes to the genotypes.''' problem = request.problem g = problem.genotype.data if request.params.selected_mode: # Phase only selected samples g = g[:, request.params.selected_samples, :] hom = np.where(gt.is_homozygous(g)[:, :]) problem.haplotype.data[hom[0], request.params.selected_samples[hom[1]], :] = g[hom] else: # Phase all samples hom = gt.is_homozygous(g)[:, :] problem.haplotype.data[hom] = g[hom] return False
def __handle_hom_entries(self, request): '''Phase all homozygous SNPs: find all homozygous SNPs in the genotype set and set their corresponding haplotypes to the genotypes.''' problem = request.problem g = problem.genotype.data if request.params.selected_mode: # Phase only selected samples g = g[:, request.params.selected_samples, :] hom = np.where(gt.is_homozygous(g)[:, :]) problem.haplotype.data[ hom[0], request.params.selected_samples[hom[1]], :] = g[hom] else: # Phase all samples hom = gt.is_homozygous(g)[:, :] problem.haplotype.data[hom] = g[hom] return False
#------------------------------------------------------------------------------------ for allele, snps in dict(zip(ALLELES, gt.index_first_missing(gc_hom))).iteritems(): parent_value = parent_allele[snps] snp_original = hom[0][snps] gc[snp_original, hom[1][snps], allele] = parent_value # if self.debug: # print 'Imputing child', (snp_original, hom[1][snps], allele, parent_value) # problem.info.imputed_genotype.append((snp_original, hom[1][snps], allele, parent_value)) h[snp_original, duo[hom[1][snps], 0], parent_type] = parent_value return False '''Return indices in which Parent0 = Parent1 = (a,a) and child = (a,b), a != b (incompatible).''' __equal_hom_parents_het_child = lambda gf, gm, gc: \ gt.is_homozygous(gf)[:, :] & gt.is_homozygous(gm)[:, :] & (gf[:, :, 0] == gm[:, :, 0]) \ & gt.is_heterozygous(gc)[:, :] '''Return indices in which Parent = (a,b) and child = (b,b), a != b (incompatible).''' __hom_parent_not_in_hom_child = lambda gp, gc: \ gt.is_homozygous(gp)[:, :] & gt.is_homozygous(gc)[:, :] & (gp[:, :, 0] != gc[:, :, 0]) #################################################################################### def __handle_impute_parent(self, request): '''Child with two determined haps (a,b) and parent has (a,MISSING) or (MISSING,a) ==> impute parent to (a,b). ''' # if request.params.selected_mode: return False for parent_type in ALLELES: __impute_duo_parent(request.problem, request.params, parent_type)
# Case C: Parent = (a,a), child = (0,x) or (x,0) (potentially compatible) ==> # impute child to a and set child hap to a #------------------------------------------------------------------------------------ for allele, snps in dict(zip(ALLELES, gt.index_first_missing(gc_hom))).iteritems(): parent_value = parent_allele[snps] snp_original = hom[0][snps] gc[snp_original, hom[1][snps], allele] = parent_value # if self.debug: # print 'Imputing child', (snp_original, hom[1][snps], allele, parent_value) # problem.info.imputed_genotype.append((snp_original, hom[1][snps], allele, parent_value)) h[snp_original, duo[hom[1][snps], 0], parent_type] = parent_value return False '''Return indices in which Parent0 = Parent1 = (a,a) and child = (a,b), a != b (incompatible).''' __equal_hom_parents_het_child = lambda gf, gm, gc: \ gt.is_homozygous(gf)[:, :] & gt.is_homozygous(gm)[:, :] & (gf[:, :, 0] == gm[:, :, 0]) \ & gt.is_heterozygous(gc)[:, :] '''Return indices in which Parent = (a,b) and child = (b,b), a != b (incompatible).''' __hom_parent_not_in_hom_child = lambda gp, gc: \ gt.is_homozygous(gp)[:, :] & gt.is_homozygous(gc)[:, :] & (gp[:, :, 0] != gc[:, :, 0]) #################################################################################### def __handle_impute_parent(self, request): '''Child with two determined haps (a,b) and parent has (a,MISSING) or (MISSING,a) ==> impute parent to (a,b). ''' # if request.params.selected_mode: return False for parent_type in ALLELES: __impute_duo_parent(request.problem, request.params, parent_type) def __impute_duo_parent(problem, params, parent_type):