Exemple #1
0
 def __init__(self,
              problem,
              params,
              family,
              parent_type,
              children,
              debug=False):
     self.problem = problem
     self.params = params
     self.sample_id = self.problem.pedigree.sample_id
     self.sample_index = self.problem.pedigree.sample_index
     (self.g, self.h) = self.problem.data
     self.family = family
     self.parent_type = parent_type
     self.parent = family.parents[parent_type]
     # self.children = np.array(list([x for x in family.children if problem.is_genotyped(x)]))#[np.array([0,1,2,5,6,7,8,9,10,11,12])]
     self.children = children
     self.num_children = len(self.children)
     self.num_snps = self.problem.num_snps
     self.snps = None
     self.het_snps = gt.where_heterozygous(self.g, sample=self.parent)
     self.template = -1
     self.__filter_snps()
     self._recombination_index = np.empty((0, 2), dtype=int)
     self.error_filter_length = 5
     self.debug = debug
Exemple #2
0
    def test_ibs_segments(self):
        '''Test locating IBD segments between parent and child haplotypes.'''
        (parent, child, hap1, hap2) = (1, 2, 0, 1)
        het_snps = gt.where_heterozygous(self.haplotype.data, parent)

        segment_set = ibd.ibs_segments(self.haplotype, parent, child, hap1,
                                       hap2, het_snps, True)
        assert_segments_almost_equal(
            segment_set,
            [((3, 172), (17080378, 18541497, 1.461, 0), ((1, 1), (2, 1))),
             ((193, 3218), (18996226, 51156934, 32.161, 1), ((1, 0), (2, 1)))],
            full_data=True,
            decimal=3,
            err_msg='Wrong IBD segments')

        segment_set = ibd.ibs_segments(self.haplotype,
                                       parent,
                                       child,
                                       hap1,
                                       hap2,
                                       het_snps,
                                       True,
                                       length_bound='base_pair',
                                       min_segment_length=10.0)
        assert_segments_almost_equal(segment_set,
                                     [((193, 3218),
                                       (18996226, 51156934, 32.161, 1),
                                       ((1, 0), (2, 1)))],
                                     full_data=True,
                                     decimal=3,
                                     err_msg='Wrong IBD segments')
Exemple #3
0
def ibd_segments_in_family(h, family, parent_het_fill_threshold, debug=False):
    '''A generator helper method that yields IBD segments in a family (compare each parent
    with the corresponding child haplotype and output segments where the haps are IBS).
    h=haplotype data set. family=family to process. parent_fill_threshold=% of parent haplotypes
    required to be phased to use it to find segments.'''
    for parent_type, parent in family.parents_dict().iteritems():
        # print family, parent, h.fill_fraction(sample=parent)
        if h.fill_fraction(sample=parent) > parent_het_fill_threshold:
            het_snps = gt.where_heterozygous(h.data, parent)
            for child in family.children:
                yield ibd_segments_parent_child(h, parent, child, parent_type, het_snps, debug=debug)
 def test_ibd_segments_parent_child(self):
     '''Test IBD segments that are apparently cut too short between a parent and child.'''
     parent = self.family.mother
     child = 2
     parent_type = MATERNAL
     h = self.problem.haplotype
     het_snps = gt.where_heterozygous(h.data, parent)
     segment_set = ip.ibd_segments_parent_child(h, parent, child, parent_type, het_snps)
     assert_segments_almost_equal(segment_set,
                                  [((2   , 2650), (17075353, 45892433, 28.817, 0), ((1, 1), (2, 1))),
                                   ((2657, 3218), (45940934, 51156934, 5.216, 2), ((1, 0), (2, 1)))],
                                 full_data=True, decimal=3, err_msg='Wrong IBD segments')
Exemple #5
0
def plot_family_comparison(problem,
                           family,
                           parent_type,
                           template=None,
                           title=None,
                           ylabel=None,
                           colors=DEFAULT_COLORS,
                           snps='het',
                           x=None,
                           y=None,
                           children=None,
                           xaxis=None,
                           yaxis=None):
    '''Generate a recombination coloring plot of the difference between a parent
    and corresponding children haplotype in a nuclear family. Only parent het SNPs are included.
    If template = None, compare against parent; otherwise compare against the template child
    whose ID is template. If a snps array is specified, the plot is restricted to those snps;
    if snp = 'het', all het snps in the parent are used; else, all snps are used.
    
    children: Display all genotyped children (default), or selected children (if specified)'''
    g, h = problem.data
    parent = family.parents[parent_type]
    template_allele = parent_type if template else PATERNAL
    template = template if template is not None else parent
    poo = problem.haplotype.poo_phase
    h_template = h[:, template, 1 - template_allele if poo[template] < 0 else
                   template_allele]  # Template haplotype to use
    # Display all genotyped children, or selected children
    children = children if children else [
        child for child in family.children_list if problem.is_genotyped(child)
    ]
    # print parent
    snps = gt.where_heterozygous(g, sample=parent) if snps == 'het' else (
        snps if snps is not None else problem.snp_range)
    template_y = 'TEMP: %d' % (template, )
    y = y if y is not None else children
    if yaxis == 'id':
        template_y = 'TEMP: %d' % (problem.pedigree.sample_id[template], )
        y = [problem.pedigree.sample_id[child] for child in y]
    ht = h[:, children, parent_type]
    ho = h[:, children, 1 - parent_type]
    flipped = (poo[children] < 0)
    ht[:, flipped] = ho[:, flipped]
    return plot_comparison(template=h_template, template_y=template_y,
                           haps=np.concatenate((h_template[:, np.newaxis], ht), axis=1),
                           snps=snps,
                           xlabel=('SNP position' if xaxis == 'snp' else 'Mbp') if xaxis else None,
                           x=_xaxis(problem.haplotype, xaxis, snps),
                           # x=x if x else ((None if xaxis == 'snp' else problem.haplotype.snp['base_pair'] / constants.MEGA_BASE_PAIR) if xaxis else None),
                           y=y,
                           title='Nuclear Family (%d,%d) Children Comparison: %s Haplotype' % \
                           (family.father, family.mother, ALLELE_LABELS[parent_type].capitalize()),
                           colors=colors), snps,
Exemple #6
0
    def test_ibs_segments(self):
        '''Test locating IBD segments between parent and child haplotypes.'''
        (parent, child, hap1, hap2) = (1, 2, 0, 1)
        het_snps = gt.where_heterozygous(self.haplotype.data, parent)
        
        segment_set = ibd.ibs_segments(self.haplotype, parent, child, hap1, hap2, het_snps, True)
        assert_segments_almost_equal(segment_set,
                                     [((3   , 172), (17080378, 18541497, 1.461, 0), ((1, 1), (2, 1))),
                                      ((193 , 3218), (18996226, 51156934, 32.161, 1), ((1, 0), (2, 1)))],
                                     full_data=True, decimal=3, err_msg='Wrong IBD segments') 

        segment_set = ibd.ibs_segments(self.haplotype, parent, child, hap1, hap2, het_snps, True,
                                       length_bound='base_pair', min_segment_length=10.0)
        assert_segments_almost_equal(segment_set,
                                     [((193 , 3218), (18996226, 51156934, 32.161, 1), ((1, 0), (2, 1)))],
                                     full_data=True, decimal=3, err_msg='Wrong IBD segments') 
Exemple #7
0
 def __init__(self, problem, params, family, parent_type, children, debug=False):
     self.problem = problem
     self.params = params
     self.sample_id = self.problem.pedigree.sample_id
     self.sample_index = self.problem.pedigree.sample_index
     (self.g, self.h) = self.problem.data
     self.family = family
     self.parent_type = parent_type
     self.parent = family.parents[parent_type]
     # self.children = np.array(list([x for x in family.children if problem.is_genotyped(x)]))#[np.array([0,1,2,5,6,7,8,9,10,11,12])]
     self.children = children
     self.num_children = len(self.children)
     self.num_snps = self.problem.num_snps
     self.snps = None
     self.het_snps = gt.where_heterozygous(self.g, sample=self.parent)
     self.template = -1
     self.__filter_snps()
     self._recombination_index = np.empty((0, 2), dtype=int)
     self.error_filter_length = 5
     self.debug = debug
Exemple #8
0
def plot_family_comparison(problem, family, parent_type, template=None, title=None, ylabel=None,
                           colors=DEFAULT_COLORS, snps='het', x=None, y=None, children=None,
                           xaxis=None, yaxis=None):
    '''Generate a recombination coloring plot of the difference between a parent
    and corresponding children haplotype in a nuclear family. Only parent het SNPs are included.
    If template = None, compare against parent; otherwise compare against the template child
    whose ID is template. If a snps array is specified, the plot is restricted to those snps;
    if snp = 'het', all het snps in the parent are used; else, all snps are used.
    
    children: Display all genotyped children (default), or selected children (if specified)'''
    g, h = problem.data
    parent = family.parents[parent_type]
    template_allele = parent_type if template else PATERNAL
    template = template if template is not None else parent
    poo = problem.haplotype.poo_phase
    h_template = h[:, template, 1 - template_allele if poo[template] < 0 else template_allele]  # Template haplotype to use
    # Display all genotyped children, or selected children 
    children = children if children else [child for child in family.children_list if problem.is_genotyped(child)]
    # print parent
    snps = gt.where_heterozygous(g, sample=parent) if snps == 'het' else (snps if snps is not None else problem.snp_range)
    template_y = 'TEMP: %d' % (template,)
    y = y if y is not None else children
    if yaxis == 'id':
        template_y = 'TEMP: %d' % (problem.pedigree.sample_id[template],)
        y = [problem.pedigree.sample_id[child] for child in y]
    ht = h[:, children, parent_type]
    ho = h[:, children, 1 - parent_type]
    flipped = (poo[children] < 0)
    ht[:, flipped] = ho[:, flipped]
    return plot_comparison(template=h_template, template_y=template_y,
                           haps=np.concatenate((h_template[:, np.newaxis], ht), axis=1),
                           snps=snps,
                           xlabel=('SNP position' if xaxis == 'snp' else 'Mbp') if xaxis else None,
                           x=_xaxis(problem.haplotype, xaxis, snps),
                           # x=x if x else ((None if xaxis == 'snp' else problem.haplotype.snp['base_pair'] / constants.MEGA_BASE_PAIR) if xaxis else None),
                           y=y,
                           title='Nuclear Family (%d,%d) Children Comparison: %s Haplotype' % \
                           (family.father, family.mother, ALLELE_LABELS[parent_type].capitalize()),
                           colors=colors), snps,
Exemple #9
0
def __handle_child_comparison(self, request):
    '''In families with at least 3 children:
    if one parent is a founder (or more generally, not sufficiently phased in het snps)
    and the other is not, the children in this family will be phased well by ParentChildFounder,
    but the non-founder parent will not be.
    
    By comparing children's haplotypes against a template child (=the most-filled child)
    and translating that into comparison between children haps and the unphased parent's, we can infer
    their IBS segments and subsequently the parent's haplotypes. 
    
    Note that the parent will have random hap-gender-assignment: we can't know which one of his/her
    haplotypes is paternal and which one is maternal (we might at a later stage, if his/her parent
    genotypes are genotyped or imputed by ancestor imputation).'''
    problem, params = request.problem, request.params
    g, h = problem.components
    # Find families with at least min_consensus_samples genotyped children, or use single
    # family if debug mode (single_member) is on
    potential_families = problem.find_families_by_member(params.single_member, genotyped=False,
                                                         min_children=params.min_consensus_samples) \
    if params.single_member else pt.selected_families(problem, params, genotyped=False, min_children=params.min_consensus_samples)
    families = [
        f for f in potential_families if len(
            problem.find_samples_with_fill_ge(
                params.surrogate_parent_fill_threshold,
                sample=f.children_array)) >= params.min_consensus_samples
    ]
    if params.debug:
        print '__handle_child_comparison(), families to process', list(
            families)
    for family in families:
        genotyped_parent_dict = [
            (k, v) for (k, v) in family.parents_dict().iteritems()
            if problem.is_genotyped(v)
        ]
        num_genotyped_parents = len(genotyped_parent_dict)
        # If both parents are genotyped, use all children - it is probably safe enough to generate
        # enough SNPs to work with (het in parent + filled in all children), since it has worked in the past.
        # If not both parents are genotyped, use filled children only to generate enough relevant SNPs.
        genotyped_children = np.array(
            [x for x in family.children_array if problem.is_genotyped(x)])
        filled_children = genotyped_children if num_genotyped_parents == 2 else \
            problem.find_samples_with_fill_ge(params.surrogate_parent_fill_threshold, sample=genotyped_children)[:, 0].astype(np.int)
        comparator = ic.ChildComparator(request, family, filled_children)
        # for parent_type, parent in reversed(family.parents_dict().items()):
        for parent_type, parent in genotyped_parent_dict:
            # het_snps = gt.where_heterozygous(h.data, parent)
            het_snps = gt.where_heterozygous(g.data, parent)
            if h.fill_fraction(sample=parent,
                               snps=het_snps) < params.het_fill_threshold:
                # if is_founder[parent]:
                # Choose template = most-filled child
                fill = problem.fill_fraction(sample=filled_children)
                if params.debug:
                    print '=' * 105
                    print 'Children comparison in', family, 'parent_type', parent_type
                    print '=' * 105
                    print[problem.is_genotyped(x) for x in family.children]
                    print '# genotyped children', sum(
                        problem.is_genotyped(x) for x in family.children),
                    print '# parent het snps', len(het_snps)
                    print 'Filled children', filled_children
                    print 'Family' 's fill:\n', problem.fill_fraction(
                        sample=family.member_set)
                template_child = int(fill[np.argmax(fill[:, 1]), 0])
                if params.debug:
                    'template_child', template_child
                # Choose a template child at random (first index in the children list)
                # template_child = list(family.children)[0]
                (_, _, info) = comparator.child_recombinations(
                    parent_type, template_child)
                # Save selected entries from the family info class in problem
                problem.set_family_info(family, info)
                # Impute parent from the template child
                if params.debug:
                    print family, parent_type
                    print 'Child recombinations'
                    print info.recombination_snp
                comparator.phase_parent_by_template(info)
                # Now phase children (and possibly some more of the parent) using IBD segments
                # found among them and the parent
                ibd.phase_by_ibd(request, info.ibs_segments(), 'majority')
    return False
Exemple #10
0
def ibd_segments_in_duo(h, parent, child, parent_type, parent_het_fill_threshold, debug=False):
    '''Similar to ibd_segments_in_family, but for a parent-child duo.'''
    if h.fill_fraction(sample=parent) > parent_het_fill_threshold:
        het_snps = gt.where_heterozygous(h.data, parent)
        yield ibd_segments_parent_child(h, parent, child, parent_type, het_snps, debug=debug)
Exemple #11
0
def __handle_child_comparison(self, request):
    '''In families with at least 3 children:
    if one parent is a founder (or more generally, not sufficiently phased in het snps)
    and the other is not, the children in this family will be phased well by ParentChildFounder,
    but the non-founder parent will not be.
    
    By comparing children's haplotypes against a template child (=the most-filled child)
    and translating that into comparison between children haps and the unphased parent's, we can infer
    their IBS segments and subsequently the parent's haplotypes. 
    
    Note that the parent will have random hap-gender-assignment: we can't know which one of his/her
    haplotypes is paternal and which one is maternal (we might at a later stage, if his/her parent
    genotypes are genotyped or imputed by ancestor imputation).'''
    problem, params = request.problem, request.params 
    g, h = problem.components
    # Find families with at least min_consensus_samples genotyped children, or use single
    # family if debug mode (single_member) is on
    potential_families = problem.find_families_by_member(params.single_member, genotyped=False,
                                                         min_children=params.min_consensus_samples) \
    if params.single_member else pt.selected_families(problem, params, genotyped=False, min_children=params.min_consensus_samples)
    families = [f for f in potential_families
                if len(problem.find_samples_with_fill_ge(params.surrogate_parent_fill_threshold, sample=f.children_array))
                >= params.min_consensus_samples]
    if params.debug: print '__handle_child_comparison(), families to process', list(families)
    for family in families:
        genotyped_parent_dict = [(k, v) for (k, v) in family.parents_dict().iteritems() if problem.is_genotyped(v)]
        num_genotyped_parents = len(genotyped_parent_dict)
        # If both parents are genotyped, use all children - it is probably safe enough to generate
        # enough SNPs to work with (het in parent + filled in all children), since it has worked in the past.
        # If not both parents are genotyped, use filled children only to generate enough relevant SNPs.
        genotyped_children = np.array([x for x in family.children_array if problem.is_genotyped(x)])
        filled_children = genotyped_children if num_genotyped_parents == 2 else \
            problem.find_samples_with_fill_ge(params.surrogate_parent_fill_threshold, sample=genotyped_children)[:, 0].astype(np.int)
        comparator = ic.ChildComparator(request, family, filled_children)
        # for parent_type, parent in reversed(family.parents_dict().items()):
        for parent_type, parent in genotyped_parent_dict:
            # het_snps = gt.where_heterozygous(h.data, parent)
            het_snps = gt.where_heterozygous(g.data, parent)
            if h.fill_fraction(sample=parent, snps=het_snps) < params.het_fill_threshold:
            # if is_founder[parent]:
                # Choose template = most-filled child
                fill = problem.fill_fraction(sample=filled_children)
                if params.debug:
                    print '=' * 105
                    print 'Children comparison in', family, 'parent_type', parent_type
                    print '=' * 105
                    print [problem.is_genotyped(x) for x in family.children]
                    print '# genotyped children', sum(problem.is_genotyped(x) for x in family.children),
                    print '# parent het snps', len(het_snps)
                    print 'Filled children', filled_children
                    print 'Family''s fill:\n', problem.fill_fraction(sample=family.member_set)
                template_child = int(fill[np.argmax(fill[:, 1]), 0])
                if params.debug:
                    'template_child', template_child
                # Choose a template child at random (first index in the children list)
                # template_child = list(family.children)[0]
                (_, _, info) = comparator.child_recombinations(parent_type, template_child)
                # Save selected entries from the family info class in problem
                problem.set_family_info(family, info)
                # Impute parent from the template child
                if params.debug:
                    print family, parent_type
                    print 'Child recombinations'
                    print info.recombination_snp
                comparator.phase_parent_by_template(info)
                # Now phase children (and possibly some more of the parent) using IBD segments
                # found among them and the parent
                ibd.phase_by_ibd(request, info.ibs_segments(), 'majority')