コード例 #1
0
ファイル: ibd_child.py プロジェクト: orenlivne/ober
 def phase_parent_by_template(self, info):
     '''Impute the parent haplotype. A random parent random phase is selected and switched at
     each recombination location in the template child.'''
     bp = self.problem.info.snp['base_pair']
     num_snps = self.problem.num_snps
     parent = info.parent
     child = info.template_child
     parent_type = info.parent_type
     if self.params.debug:
         print 'phase_parent_by_template(parent=%d, template=%d, parent_type=%d)' % (parent, child, parent_type)
     # h = problem.haplotype.data                 
     # Infer the parent's haplotypes from the template child's recombinations r
     r = info.recombination_snp
     edge = [y for (x, y) in r if x == child]
     # r = np.concatenate(([-1], edge, [problem.num_snps-1]))
     
     phase = PATERNAL  # Random phase set at the starting of the chromosome
     segments = segment.edges_to_segments(self.problem.snp_range, edge, phase,
                                          self.problem.num_snps, cover=True)
     segment_set = SegmentSet((Segment((x[0], x[1]), ((parent, x[2]), (child, parent_type)),
                                       (bp[x[0]], im.segment.stop_bp(bp, x[1], num_snps))) 
                               for x in segments))
     # Only a single sweep suffices here, since only two samples are involved. If a homogeneous
     # entry is discovered and used to propagate phases in the second of the two IBD sets in
     # a SNP range, there's no need to fix the first one since the data is already fully filled.
     ibd.phase_by_ibd(self.request, [segment_set], 'max', num_sweeps=1)
コード例 #2
0
ファイル: ibd_child.py プロジェクト: orenlivne/ober
    def ibs_segments(self):
        '''Convert recombinations r to IBD segments. A generator.'''
        if not self.snps_exist:
            return
        r = self.recombination_index
        bp = self.problem.info.snp['base_pair']
        # print r
        
        # Calculate IBD segments between all children and parent 
        for child in self.children:
            # print '-------------------- child %d ----------------------' % (child,)
            edge = [y for (x, y) in r if x == child]
            phase = PATERNAL  # Random phase set at the starting of the chromosome
            segments = segment.edges_to_segments(self.snps, edge, phase, self.num_snps, cover=True)
            if self.debug:
                print 'child', child, 'edge', edge, 'segments', segments

            # Flip initial phase if parent is phased at hap snps by comparing its 
            # haplotypes with the child haplotypes on the longest IBD segment
            i = np.argmax(np.diff(segments)[:, 0])
            segment_phase = phase if np.mod(i, 2) == 0 else 1 - phase
            actual_phase = self.__equal_parent_hap(child, segments[i, 0], segments[i, 1])
            if segment_phase != actual_phase:
                segments = segment.flip_phase(segments)

            # Output segments in the standard IBD segment format
            yield SegmentSet((Segment((x[0], x[1]), ((self.parent, x[2]), (child, self.parent_type)),
                                      (bp[x[0]], im.segment.stop_bp(bp, x[1], self.num_snps)))
                              for x in segments))
コード例 #3
0
ファイル: ibd_child.py プロジェクト: orenlivne/ober
    def phase_parent_by_template(self, info):
        '''Impute the parent haplotype. A random parent random phase is selected and switched at
        each recombination location in the template child.'''
        bp = self.problem.info.snp['base_pair']
        num_snps = self.problem.num_snps
        parent = info.parent
        child = info.template_child
        parent_type = info.parent_type
        if self.params.debug:
            print 'phase_parent_by_template(parent=%d, template=%d, parent_type=%d)' % (
                parent, child, parent_type)
        # h = problem.haplotype.data
        # Infer the parent's haplotypes from the template child's recombinations r
        r = info.recombination_snp
        edge = [y for (x, y) in r if x == child]
        # r = np.concatenate(([-1], edge, [problem.num_snps-1]))

        phase = PATERNAL  # Random phase set at the starting of the chromosome
        segments = segment.edges_to_segments(self.problem.snp_range,
                                             edge,
                                             phase,
                                             self.problem.num_snps,
                                             cover=True)
        segment_set = SegmentSet(
            (Segment((x[0], x[1]), ((parent, x[2]), (child, parent_type)),
                     (bp[x[0]], im.segment.stop_bp(bp, x[1], num_snps)))
             for x in segments))
        # Only a single sweep suffices here, since only two samples are involved. If a homogeneous
        # entry is discovered and used to propagate phases in the second of the two IBD sets in
        # a SNP range, there's no need to fix the first one since the data is already fully filled.
        ibd.phase_by_ibd(self.request, [segment_set], 'max', num_sweeps=1)
コード例 #4
0
ファイル: ibd_child.py プロジェクト: orenlivne/ober
    def ibs_segments(self):
        '''Convert recombinations r to IBD segments. A generator.'''
        if not self.snps_exist:
            return
        r = self.recombination_index
        bp = self.problem.info.snp['base_pair']
        # print r

        # Calculate IBD segments between all children and parent
        for child in self.children:
            # print '-------------------- child %d ----------------------' % (child,)
            edge = [y for (x, y) in r if x == child]
            phase = PATERNAL  # Random phase set at the starting of the chromosome
            segments = segment.edges_to_segments(self.snps,
                                                 edge,
                                                 phase,
                                                 self.num_snps,
                                                 cover=True)
            if self.debug:
                print 'child', child, 'edge', edge, 'segments', segments

            # Flip initial phase if parent is phased at hap snps by comparing its
            # haplotypes with the child haplotypes on the longest IBD segment
            i = np.argmax(np.diff(segments)[:, 0])
            segment_phase = phase if np.mod(i, 2) == 0 else 1 - phase
            actual_phase = self.__equal_parent_hap(child, segments[i, 0],
                                                   segments[i, 1])
            if segment_phase != actual_phase:
                segments = segment.flip_phase(segments)

            # Output segments in the standard IBD segment format
            yield SegmentSet((Segment(
                (x[0], x[1]), ((self.parent, x[2]), (child, self.parent_type)),
                (bp[x[0]], im.segment.stop_bp(bp, x[1], self.num_snps)))
                              for x in segments))
コード例 #5
0
def ibs_segments(haplotype,
                 id1,
                 id2,
                 hap1_type,
                 hap2_type,
                 snps=None,
                 include_alt_phase=False,
                 error_filter='median',
                 error_filter_length=5,
                 length_bound=None,
                 min_segment_length=INDETERMINATE,
                 margin=0.0,
                 debug=False):
    '''Return 1) Identical-by-State (IBS) segments separated by recombination events between two
    sample haplotypes (id1, hap1_type) and (id2, hap2_type). The 2-D output array's ith row format is
    
    (segment_start, segment_stop),
    (id1, hap1), (id2, hap2), 
    (segment_start_bp, segment_stop_bp, segment_length_in_bp, num_errors_in_segment) 
    
    The SNP range is [segment_start, segment_stop) where start=inclusive and stop is exclusive.
    2) List of het_snp indices at which there are likely genotype errors.
        
    Options:
    snps - list of SNPs to base the comparison on. For parent-child comparisons, these should
    be heterozygous SNPs in the parent's genotype, distinguishing its haplotypes
    and used to locate segments. For unphased-phased individuals, these should be the list of
    homozygous SNPs at the unphased individual (those that have data).
    If not specified, all SNPs are used.
    
    length_bound - minimum segment length bound type:
        None: no lower bound enforced 
        'base_pair': output segments of at least min_segment_length [base pair]
        'snp': output segments of at least min_segment_length consecutive SNPs out of the snps list.
               This is useful only if snps includes all SNPs (or is None) 
        *NOTE*: min_segment_length''s units are interpreted differently depending on length_bound.
         
    margin = fraction of segment to discard near the endpoints (margin/2 is removed from each side).'''

    if debug:
        print 'Computing IBD segments between haplotypes (%d,%d), (%d,%d); filter %s length %d' % \
        (id1, hap1_type, id2, hap2_type, error_filter, error_filter_length)
    d = diff.all_diffs(haplotype.data,
                       id1,
                       id2,
                       hap1_type=hap1_type,
                       hap2_type=hap2_type)[0]
    # Segment length, as defined by the input parameters
    segment_length = lambda f: np.inf if not length_bound else (
        f.length
        if length_bound == 'base_pair' else f.num_snps)  # @UnusedVariable

    # Consider informative or the specified SNPs only
    snps = snps if snps is not None else haplotype.snp_range
    snps = np.intersect1d(snps, np.where(d != INDETERMINATE)[0])
    d_snps = d[snps]
    filtered_diff = filter_diff(d_snps, error_filter, error_filter_length)
    error_snps = snps[np.nonzero(d_snps - filtered_diff)[0]]

    # Detect edges as non-zero gradient points; output sufficiently long segments
    bp = haplotype.snp['base_pair']
    num_snps = haplotype.num_snps
    if np.size(filtered_diff) == 0:
        # No data to consider ==> no IBD intervals can be identified
        segments = []
    else:
        deriv = ndimage.convolve(filtered_diff, [1, -1])
        edge = np.where(deriv != 0)[0]
        initial_phase = hap1_type if filtered_diff[0] == 0 else 1 - hap1_type
        if debug:
            print 'initial_phase', initial_phase  # , 'edge', edge
        # Convert recombination locations to segments of no recombination; filter short segments
        segments = [
            f for f in (
                Segment(((x[0], x[1])),
                        set(((id1, x[2]), (id2, hap2_type))), (
                            bp[x[0]], segment.stop_bp(bp, x[1], num_snps)),
                        error_snps=segment.in_segment(error_snps, x))
                for x in segment.edges_to_segments(
                    snps, edge, initial_phase, haplotype.num_snps, hap1_type))
            if segment_length(f) >= min_segment_length
        ]

    # Cut segment margins
    if margin >= constants.SMALL_FLOAT:
        segments = [
            s for s in (s.middle_part(haplotype.nearest_snp, bp, margin)
                        for s in segments) if s
        ]

    # Restrict errors to those inside segments
    segment_set = SegmentSet(segments,
                             np.array(util.flattened_meshgrid(reduce(list.__add__, (s.error_snps.tolist() for s in segments)),
                                                                        np.array([id1, id2])), dtype=int) \
                             if segments else gt.empty_errors_array())
    if debug:
        print 'ibs_segments()', segment_set
        print 'errors', segment_set.errors
    return segment_set
コード例 #6
0
ファイル: ibd.py プロジェクト: orenlivne/ober
def ibs_segments(haplotype, id1, id2, hap1_type, hap2_type, snps=None, include_alt_phase=False,
                 error_filter='median', error_filter_length=5,
                 length_bound=None, min_segment_length=INDETERMINATE, margin=0.0, debug=False):
    '''Return 1) Identical-by-State (IBS) segments separated by recombination events between two
    sample haplotypes (id1, hap1_type) and (id2, hap2_type). The 2-D output array's ith row format is
    
    (segment_start, segment_stop),
    (id1, hap1), (id2, hap2), 
    (segment_start_bp, segment_stop_bp, segment_length_in_bp, num_errors_in_segment) 
    
    The SNP range is [segment_start, segment_stop) where start=inclusive and stop is exclusive.
    2) List of het_snp indices at which there are likely genotype errors.
        
    Options:
    snps - list of SNPs to base the comparison on. For parent-child comparisons, these should
    be heterozygous SNPs in the parent's genotype, distinguishing its haplotypes
    and used to locate segments. For unphased-phased individuals, these should be the list of
    homozygous SNPs at the unphased individual (those that have data).
    If not specified, all SNPs are used.
    
    length_bound - minimum segment length bound type:
        None: no lower bound enforced 
        'base_pair': output segments of at least min_segment_length [base pair]
        'snp': output segments of at least min_segment_length consecutive SNPs out of the snps list.
               This is useful only if snps includes all SNPs (or is None) 
        *NOTE*: min_segment_length''s units are interpreted differently depending on length_bound.
         
    margin = fraction of segment to discard near the endpoints (margin/2 is removed from each side).'''
    
    if debug:
        print 'Computing IBD segments between haplotypes (%d,%d), (%d,%d); filter %s length %d' % \
        (id1, hap1_type, id2, hap2_type, error_filter, error_filter_length)
    d = diff.all_diffs(haplotype.data, id1, id2, hap1_type=hap1_type, hap2_type=hap2_type)[0]
    # Segment length, as defined by the input parameters 
    segment_length = lambda f: np.inf if not length_bound else (f.length if length_bound == 'base_pair' else f.num_snps)  # @UnusedVariable
    
    # Consider informative or the specified SNPs only
    snps = snps if snps is not None else haplotype.snp_range
    snps = np.intersect1d(snps, np.where(d != INDETERMINATE)[0])
    d_snps = d[snps]
    filtered_diff = filter_diff(d_snps, error_filter, error_filter_length)    
    error_snps = snps[np.nonzero(d_snps - filtered_diff)[0]]
    
    # Detect edges as non-zero gradient points; output sufficiently long segments
    bp = haplotype.snp['base_pair']
    num_snps = haplotype.num_snps
    if np.size(filtered_diff) == 0:
        # No data to consider ==> no IBD intervals can be identified
        segments = []
    else:
        deriv = ndimage.convolve(filtered_diff, [1, -1])    
        edge = np.where(deriv != 0)[0]
        initial_phase = hap1_type if filtered_diff[0] == 0 else 1 - hap1_type
        if debug:
            print 'initial_phase', initial_phase  # , 'edge', edge
        # Convert recombination locations to segments of no recombination; filter short segments
        segments = [f for f in (Segment(((x[0], x[1])), set(((id1, x[2]), (id2, hap2_type))),
                                        (bp[x[0]], segment.stop_bp(bp, x[1], num_snps)),
                                        error_snps=segment.in_segment(error_snps, x))
                                for x in segment.edges_to_segments(snps, edge, initial_phase,
                                                                   haplotype.num_snps, hap1_type))
                    if segment_length(f) >= min_segment_length]
    
    # Cut segment margins
    if margin >= constants.SMALL_FLOAT:
        segments = [s for s in (s.middle_part(haplotype.nearest_snp, bp, margin) for s in segments) if s]
    
    # Restrict errors to those inside segments
    segment_set = SegmentSet(segments,
                             np.array(util.flattened_meshgrid(reduce(list.__add__, (s.error_snps.tolist() for s in segments)),
                                                                        np.array([id1, id2])), dtype=int) \
                             if segments else gt.empty_errors_array())
    if debug:
        print 'ibs_segments()', segment_set
        print 'errors', segment_set.errors
    return segment_set