Exemple #1
0
def write_imputed(t, out, debug=False, poo_phase=None):
    '''Write imputed genotypes to the stream out in CGI format.'''
    data, metadata, hap_type = im.recode.recode_cgi(t.imputed_data), t.genotype.metadata, t.imputed_hap_type
    if poo_phase is not None:
        aligned_samples = np.where(poo_phase)[0]
        t.imputed_hap_type[:, aligned_samples] = im.constants.PHASED_WITH_ORIGIN
        # Flip haplotypes of samples with flipped POO phase
        flipped_samples = np.where(poo_phase < 0)[0]
        orig = flattened_meshgrid(flipped_samples, im.constants.ALLELES)
        flipped = flattened_meshgrid(flipped_samples, list(reversed(im.constants.ALLELES)))
        data[:, orig[0], orig[1]] = data[:, flipped[0], flipped[1]] 
        
    if debug: np.set_printoptions(threshold=np.nan)
    for snp in t.genotype.snp_range:
        # Ensure that all fields are non-empty - easier to parse by subsequent processes
        if metadata:
            np.savetxt(out, np.array(map(lambda x: x if x else '-', metadata[snp])), fmt='%s', newline='\011', delimiter='')
        # Remove trailing tab at the end of the line produced by the numpy savetxt call
        out_str = StringIO.StringIO()
        np.savetxt(out_str, [x[0] + x[1] for x in it.izip(it.imap(str, hap_type[snp]), (g[0] + g[1] for g in data[snp]))], fmt='%s', newline='\011', delimiter='')
        out.write(out_str.getvalue()[:-1])
        out.write('\n')
        out.flush()
        if debug: print np.concatenate((np.arange(data.shape[1])[np.newaxis].transpose(), hap_type[snp][np.newaxis].transpose(), t.imputed_data[snp]), axis=1) 
    if debug: np.set_printoptions(threshold=1000)
Exemple #2
0
 def __init__(self, problem, fraction=None, test_index=None):
     '''Initialize an experiment to be run on a problem, clearing out 'fraction' of the data. If test_index
     is specified, these specific test indices are used; otherwise a random fraction is generated.
     
     If test_index = 'hap', data is read from problem.h (haplotype array). The entire array
     is considered as a test array, but nothing is zeroed out. Useful for phasing result stats.'''
     # Create a working copy of the problem. Only the data is copied.
     if not (fraction is not None) ^ (test_index is not None):
         raise ValueError('Must specify fraction or test_index')
     self.problem = Problem(problem.pedigree, problem.genotype.copy())
     self.h = self.problem.h
     
     # Create test set; save original genotypes in g_orig
     if test_index is None:
         self.fraction = fraction
         self.g_orig, i = clear_random_portion(self.problem.genotype.data, fraction)
     elif test_index == 'hap':
         # Don't clear anything; call everything a test index.
         h = problem.h
         i = tuple(util.flattened_meshgrid(range(h.shape[0]), range(h.shape[1])))
         self.g_orig = problem.g
         self.h = h
         self.fraction = 1.0
     else:
         self.g_orig, i = clear_index(self.problem.g, test_index)
         self.fraction = (1.0 * i[0].size) / (self.h.shape[0] * self.h.shape[1])
     self.num_tests = i[0].size
     self.test_index = i
     self.r_orig = recode.recode_single_genotype(self.g_orig)
     self.fill = self.problem.fill_fraction()[:, SAMPLE]
     self.__recode_single_genotype = None
Exemple #3
0
    def __init__(self, problem, fraction=None, test_index=None):
        '''Initialize an experiment to be run on a problem, clearing out 'fraction' of the data. If test_index
        is specified, these specific test indices are used; otherwise a random fraction is generated.
        
        If test_index = 'hap', data is read from problem.h (haplotype array). The entire array
        is considered as a test array, but nothing is zeroed out. Useful for phasing result stats.'''
        # Create a working copy of the problem. Only the data is copied.
        if not (fraction is not None) ^ (test_index is not None):
            raise ValueError('Must specify fraction or test_index')
        self.problem = Problem(problem.pedigree, problem.genotype.copy())
        self.h = self.problem.h

        # Create test set; save original genotypes in g_orig
        if test_index is None:
            self.fraction = fraction
            self.g_orig, i = clear_random_portion(self.problem.genotype.data,
                                                  fraction)
        elif test_index == 'hap':
            # Don't clear anything; call everything a test index.
            h = problem.h
            i = tuple(
                util.flattened_meshgrid(range(h.shape[0]), range(h.shape[1])))
            self.g_orig = problem.g
            self.h = h
            self.fraction = 1.0
        else:
            self.g_orig, i = clear_index(self.problem.g, test_index)
            self.fraction = (1.0 * i[0].size) / (self.h.shape[0] *
                                                 self.h.shape[1])
        self.num_tests = i[0].size
        self.test_index = i
        self.r_orig = recode.recode_single_genotype(self.g_orig)
        self.fill = self.problem.fill_fraction()[:, SAMPLE]
        self.__recode_single_genotype = None
Exemple #4
0
def genotype_ibs_segments(genotype,
                          id1,
                          id2,
                          snps,
                          error_filter='median',
                          error_filter_length=5,
                          margin=0.0,
                          min_ibs_len_snp=400,
                          debug=False):
    '''Return Identical-by-State (IBS >= 1) segments between two genoypes of samples id1 and id2
    in the SNP range [snp[0],snp[1]) (if snp is a tuple) or the subset of SNPs, if snps is an array.
    
    See ibs_segments() for a description of optional parameters.'''
    num_snps = genotype.num_snps
    g = genotype.data
    g1 = recode.recode_single_genotype(g[snps, id1, :])
    g2 = recode.recode_single_genotype(g[snps, id2, :])
    d = (recode.ibs_state(g1, g2) == 0).astype(np.byte)

    # Consider informative or the specified SNPs only
    filtered_diff = filter_diff(d, error_filter, error_filter_length)
    error_snps = snps[np.nonzero(d - filtered_diff)[0]]

    # Detect edges as non-zero gradient points; output sufficiently long segments
    if np.size(filtered_diff) == 0:
        # No data to consider ==> no IBD intervals can be identified
        segments = []
    else:
        # Convert recombination locations to segments of no recombination; filter short segments
        bp = genotype.snp['base_pair']
        #print segment.segments_with_value(filtered_diff, 0, min_ibs_len_snp)
        segments = [
            Segment(((x[0], x[1])), [id1, id2],
                    (bp[x[0]], segment.stop_bp(bp, x[1], num_snps)),
                    error_snps=segment.in_segment(error_snps, x),
                    collapse_to_set=False) for x in
            segment.segments_with_value(filtered_diff, 0, min_ibs_len_snp)
        ]

    # Cut segment margins
    if margin >= constants.SMALL_FLOAT:
        segments = [
            s for s in (s.middle_part(
                genotype.nearest_snp, bp, margin, collapse_to_set=False)
                        for s in segments) if s
        ]

    # Restrict errors to those inside segments
    segment_set = SegmentSet(segments,
                             np.array(util.flattened_meshgrid(reduce(list.__add__, (s.error_snps.tolist() for s in segments)),
                                                              np.array([id1, id2])), dtype=int) \
                             if segments else gt.empty_errors_array())
    if debug:
        print 'ibs_segments()', segment_set
        print 'errors', segment_set.errors
    return segment_set
Exemple #5
0
def write_imputed(t, out, debug=False, poo_phase=None):
    '''Write imputed genotypes to the stream out in CGI format.'''
    data, metadata, hap_type = im.recode.recode_cgi(
        t.imputed_data), t.genotype.metadata, t.imputed_hap_type
    if poo_phase is not None:
        aligned_samples = np.where(poo_phase)[0]
        t.imputed_hap_type[:,
                           aligned_samples] = im.constants.PHASED_WITH_ORIGIN
        # Flip haplotypes of samples with flipped POO phase
        flipped_samples = np.where(poo_phase < 0)[0]
        orig = flattened_meshgrid(flipped_samples, im.constants.ALLELES)
        flipped = flattened_meshgrid(flipped_samples,
                                     list(reversed(im.constants.ALLELES)))
        data[:, orig[0], orig[1]] = data[:, flipped[0], flipped[1]]

    if debug: np.set_printoptions(threshold=np.nan)
    for snp in t.genotype.snp_range:
        # Ensure that all fields are non-empty - easier to parse by subsequent processes
        if metadata:
            np.savetxt(out,
                       np.array(map(lambda x: x if x else '-', metadata[snp])),
                       fmt='%s',
                       newline='\011',
                       delimiter='')
        # Remove trailing tab at the end of the line produced by the numpy savetxt call
        out_str = StringIO.StringIO()
        np.savetxt(out_str, [
            x[0] + x[1]
            for x in it.izip(it.imap(str, hap_type[snp]), (g[0] + g[1]
                                                           for g in data[snp]))
        ],
                   fmt='%s',
                   newline='\011',
                   delimiter='')
        out.write(out_str.getvalue()[:-1])
        out.write('\n')
        out.flush()
        if debug:
            print np.concatenate(
                (np.arange(data.shape[1])[np.newaxis].transpose(),
                 hap_type[snp][np.newaxis].transpose(), t.imputed_data[snp]),
                axis=1)
    if debug: np.set_printoptions(threshold=1000)
Exemple #6
0
 def __error_index(self, diff, errors, num_errors, error_type):
     '''Return the corresponding snp and child indices of genotype errors. These are rows
     in the errors array that have error_type non-zeros.'''
     if error_type == FamilyIbdComputer.NON_TEMPLATE:
         # Non-template children errors
         snp_index = errors[np.where(num_errors == 1)[0]]
         return (self.snps[snp_index], self.children[np.where(diff[snp_index, :] != 0)[1]])
     if error_type == FamilyIbdComputer.TEMPLATE:
         # Template child errors
         snp_index = errors[np.where(num_errors == self.num_children - 1)[0]]
         return (self.snps[snp_index], np.tile(self.children[self.template], (len(snp_index),))) 
     else:
         # Indecisive cases: flag parent+all children as errors -- the best we can do for now.
         snp_index = errors[np.where(np.logical_and(num_errors != 1, num_errors != self.num_children - 1))[0]]
         a = util.flattened_meshgrid(np.concatenate((self.children, [self.parent])), self.snps[snp_index])
         return (a[1], a[0])
Exemple #7
0
def genotype_ibs_segments(genotype, id1, id2, snps,
                          error_filter='median', error_filter_length=5, margin=0.0,
                          min_ibs_len_snp=400, debug=False):
    '''Return Identical-by-State (IBS >= 1) segments between two genoypes of samples id1 and id2
    in the SNP range [snp[0],snp[1]) (if snp is a tuple) or the subset of SNPs, if snps is an array.
    
    See ibs_segments() for a description of optional parameters.'''
    num_snps = genotype.num_snps
    g = genotype.data
    g1 = recode.recode_single_genotype(g[snps, id1, :])
    g2 = recode.recode_single_genotype(g[snps, id2, :])
    d = (recode.ibs_state(g1, g2) == 0).astype(np.byte)

    # Consider informative or the specified SNPs only
    filtered_diff = filter_diff(d, error_filter, error_filter_length)
    error_snps = snps[np.nonzero(d - filtered_diff)[0]]
    
    # Detect edges as non-zero gradient points; output sufficiently long segments
    if np.size(filtered_diff) == 0:
        # No data to consider ==> no IBD intervals can be identified
        segments = []
    else:
        # Convert recombination locations to segments of no recombination; filter short segments
        bp = genotype.snp['base_pair']
        #print segment.segments_with_value(filtered_diff, 0, min_ibs_len_snp)
        segments = [Segment(((x[0], x[1])), [id1, id2], (bp[x[0]], segment.stop_bp(bp, x[1], num_snps)),
                            error_snps=segment.in_segment(error_snps, x), collapse_to_set=False)
                    for x in segment.segments_with_value(filtered_diff, 0, min_ibs_len_snp)]
    
    # Cut segment margins
    if margin >= constants.SMALL_FLOAT:
        segments = [s for s in (s.middle_part(genotype.nearest_snp, bp, margin, collapse_to_set=False)
                                for s in segments) if s]

    # Restrict errors to those inside segments
    segment_set = SegmentSet(segments,
                             np.array(util.flattened_meshgrid(reduce(list.__add__, (s.error_snps.tolist() for s in segments)),
                                                              np.array([id1, id2])), dtype=int) \
                             if segments else gt.empty_errors_array())
    if debug:
        print 'ibs_segments()', segment_set
        print 'errors', segment_set.errors
    return segment_set
Exemple #8
0
    def impute(self, samples=None):
        """Infer imputed genotypes at all samples of h from the samples of g, at the location
        between k-1 and k."""

        # Phase all hom training samples
        self.__phase_hom()

        # Phase as much as possible non-hom training samples
        # Bootstrap: impute R -> T; pass imputed samples from T to R; repeat
        R = azip(*util.flattened_meshgrid(self.hom, ALLELES))
        T = set(self.non_hom)
        R, _ = self._impute_bootstrap(R, T, self.num_passes_training, True, "non-hom typed")

        # Impute target  samples from all available training haps
        T = set(range(self.h.num_samples)) - set(self.training_sample_index)
        if samples is not None:
            T &= set(samples)
        R, T = self._impute_bootstrap(R, T, self.num_passes_training, False, "non-typed")

        return self.result
Exemple #9
0
    def impute(self):
        '''Infer imputed genotypes at all samples of h from the samples of g, at the location
        between k-1 and k.'''

        # Phase all hom training samples
        # print '#' * 80

        # print '#' * 80
        self.__phase_hom()
        print 'Imputing genotyped hom', '|hom|', len(
            self.hom), '|non_hom|', len(self.non_hom)

        # Phase as much as possible non-hom training samples
        R = util.flattened_meshgrid(self.hom, ALLELES)
        # set_printoptions(threshold=np.nan)
        # R = [np.array([1053]), np.array([0])]
        if self.debug:
            print '#' * 80
        print 'Imputing genotyped non-hom', '|R|', len(R[0]), '|T|', len(
            self.non_hom)
        if self.debug:
            print '#' * 80
            print 'R', R
            print 'T', self.non_hom
        for sample in self.non_hom:
            self._impute_sample(sample, R, True)

        # Impute non-training samples from all available training haps
        # R = [np.array([1053, 0]), np.array([0])]
        R = self.result.nonzero()
        T = set(range(self.h.num_samples)) - set(self.training_sample_index)
        if self.debug:
            print '#' * 80
        print 'Imputing non-genotyped', '|R|', len(R[0]), '|T|', len(T)
        if self.debug:
            print '#' * 80
            print 'R', R
            print 'T', self.non_hom
        for sample in T:
            self._impute_sample(sample, R, False)
        return self.result
Exemple #10
0
 def impute(self):
     '''Infer imputed genotypes at all samples of h from the samples of g, at the location
     between k-1 and k.'''
     
     # Phase all hom training samples         
     # print '#' * 80
     
     # print '#' * 80
     self.__phase_hom()
     print 'Imputing genotyped hom', '|hom|', len(self.hom), '|non_hom|', len(self.non_hom)
     
     # Phase as much as possible non-hom training samples
     R = util.flattened_meshgrid(self.hom, ALLELES)
     # set_printoptions(threshold=np.nan)
     # R = [np.array([1053]), np.array([0])]
     if self.debug:
         print '#' * 80
     print 'Imputing genotyped non-hom', '|R|', len(R[0]), '|T|', len(self.non_hom)
     if self.debug:
         print '#' * 80
         print 'R', R
         print 'T', self.non_hom
     for sample in self.non_hom:
         self._impute_sample(sample, R, True)
     
     # Impute non-training samples from all available training haps
     # R = [np.array([1053, 0]), np.array([0])]
     R = self.result.nonzero()
     T = set(range(self.h.num_samples)) - set(self.training_sample_index)
     if self.debug:
         print '#' * 80
     print 'Imputing non-genotyped', '|R|', len(R[0]), '|T|', len(T)
     if self.debug:
         print '#' * 80
         print 'R', R
         print 'T', self.non_hom
     for sample in T:
         self._impute_sample(sample, R, False)
     return self.result
Exemple #11
0
    def impute(self, samples=None):
        '''Infer imputed genotypes at all samples of h from the samples of g, at the location
        between k-1 and k.'''

        # Phase all hom training samples
        self.__phase_hom()

        # Phase as much as possible non-hom training samples
        # Bootstrap: impute R -> T; pass imputed samples from T to R; repeat
        R = azip(*util.flattened_meshgrid(self.hom, ALLELES))
        T = set(self.non_hom)
        R, _ = self._impute_bootstrap(R, T, self.num_passes_training, True,
                                      'non-hom typed')

        # Impute target  samples from all available training haps
        T = set(range(self.h.num_samples)) - set(self.training_sample_index)
        if samples is not None:
            T &= set(samples)
        R, T = self._impute_bootstrap(R, T, self.num_passes_training, False,
                                      'non-typed')

        return self.result
Exemple #12
0
 def __error_index(self, diff, errors, num_errors, error_type):
     '''Return the corresponding snp and child indices of genotype errors. These are rows
     in the errors array that have error_type non-zeros.'''
     if error_type == FamilyIbdComputer.NON_TEMPLATE:
         # Non-template children errors
         snp_index = errors[np.where(num_errors == 1)[0]]
         return (self.snps[snp_index],
                 self.children[np.where(diff[snp_index, :] != 0)[1]])
     if error_type == FamilyIbdComputer.TEMPLATE:
         # Template child errors
         snp_index = errors[np.where(num_errors == self.num_children -
                                     1)[0]]
         return (self.snps[snp_index],
                 np.tile(self.children[self.template], (len(snp_index), )))
     else:
         # Indecisive cases: flag parent+all children as errors -- the best we can do for now.
         snp_index = errors[np.where(
             np.logical_and(num_errors != 1,
                            num_errors != self.num_children - 1))[0]]
         a = util.flattened_meshgrid(
             np.concatenate((self.children, [self.parent])),
             self.snps[snp_index])
         return (a[1], a[0])
Exemple #13
0
def ibs_segments(haplotype,
                 id1,
                 id2,
                 hap1_type,
                 hap2_type,
                 snps=None,
                 include_alt_phase=False,
                 error_filter='median',
                 error_filter_length=5,
                 length_bound=None,
                 min_segment_length=INDETERMINATE,
                 margin=0.0,
                 debug=False):
    '''Return 1) Identical-by-State (IBS) segments separated by recombination events between two
    sample haplotypes (id1, hap1_type) and (id2, hap2_type). The 2-D output array's ith row format is
    
    (segment_start, segment_stop),
    (id1, hap1), (id2, hap2), 
    (segment_start_bp, segment_stop_bp, segment_length_in_bp, num_errors_in_segment) 
    
    The SNP range is [segment_start, segment_stop) where start=inclusive and stop is exclusive.
    2) List of het_snp indices at which there are likely genotype errors.
        
    Options:
    snps - list of SNPs to base the comparison on. For parent-child comparisons, these should
    be heterozygous SNPs in the parent's genotype, distinguishing its haplotypes
    and used to locate segments. For unphased-phased individuals, these should be the list of
    homozygous SNPs at the unphased individual (those that have data).
    If not specified, all SNPs are used.
    
    length_bound - minimum segment length bound type:
        None: no lower bound enforced 
        'base_pair': output segments of at least min_segment_length [base pair]
        'snp': output segments of at least min_segment_length consecutive SNPs out of the snps list.
               This is useful only if snps includes all SNPs (or is None) 
        *NOTE*: min_segment_length''s units are interpreted differently depending on length_bound.
         
    margin = fraction of segment to discard near the endpoints (margin/2 is removed from each side).'''

    if debug:
        print 'Computing IBD segments between haplotypes (%d,%d), (%d,%d); filter %s length %d' % \
        (id1, hap1_type, id2, hap2_type, error_filter, error_filter_length)
    d = diff.all_diffs(haplotype.data,
                       id1,
                       id2,
                       hap1_type=hap1_type,
                       hap2_type=hap2_type)[0]
    # Segment length, as defined by the input parameters
    segment_length = lambda f: np.inf if not length_bound else (
        f.length
        if length_bound == 'base_pair' else f.num_snps)  # @UnusedVariable

    # Consider informative or the specified SNPs only
    snps = snps if snps is not None else haplotype.snp_range
    snps = np.intersect1d(snps, np.where(d != INDETERMINATE)[0])
    d_snps = d[snps]
    filtered_diff = filter_diff(d_snps, error_filter, error_filter_length)
    error_snps = snps[np.nonzero(d_snps - filtered_diff)[0]]

    # Detect edges as non-zero gradient points; output sufficiently long segments
    bp = haplotype.snp['base_pair']
    num_snps = haplotype.num_snps
    if np.size(filtered_diff) == 0:
        # No data to consider ==> no IBD intervals can be identified
        segments = []
    else:
        deriv = ndimage.convolve(filtered_diff, [1, -1])
        edge = np.where(deriv != 0)[0]
        initial_phase = hap1_type if filtered_diff[0] == 0 else 1 - hap1_type
        if debug:
            print 'initial_phase', initial_phase  # , 'edge', edge
        # Convert recombination locations to segments of no recombination; filter short segments
        segments = [
            f for f in (
                Segment(((x[0], x[1])),
                        set(((id1, x[2]), (id2, hap2_type))), (
                            bp[x[0]], segment.stop_bp(bp, x[1], num_snps)),
                        error_snps=segment.in_segment(error_snps, x))
                for x in segment.edges_to_segments(
                    snps, edge, initial_phase, haplotype.num_snps, hap1_type))
            if segment_length(f) >= min_segment_length
        ]

    # Cut segment margins
    if margin >= constants.SMALL_FLOAT:
        segments = [
            s for s in (s.middle_part(haplotype.nearest_snp, bp, margin)
                        for s in segments) if s
        ]

    # Restrict errors to those inside segments
    segment_set = SegmentSet(segments,
                             np.array(util.flattened_meshgrid(reduce(list.__add__, (s.error_snps.tolist() for s in segments)),
                                                                        np.array([id1, id2])), dtype=int) \
                             if segments else gt.empty_errors_array())
    if debug:
        print 'ibs_segments()', segment_set
        print 'errors', segment_set.errors
    return segment_set
Exemple #14
0
def ibs_segments(haplotype, id1, id2, hap1_type, hap2_type, snps=None, include_alt_phase=False,
                 error_filter='median', error_filter_length=5,
                 length_bound=None, min_segment_length=INDETERMINATE, margin=0.0, debug=False):
    '''Return 1) Identical-by-State (IBS) segments separated by recombination events between two
    sample haplotypes (id1, hap1_type) and (id2, hap2_type). The 2-D output array's ith row format is
    
    (segment_start, segment_stop),
    (id1, hap1), (id2, hap2), 
    (segment_start_bp, segment_stop_bp, segment_length_in_bp, num_errors_in_segment) 
    
    The SNP range is [segment_start, segment_stop) where start=inclusive and stop is exclusive.
    2) List of het_snp indices at which there are likely genotype errors.
        
    Options:
    snps - list of SNPs to base the comparison on. For parent-child comparisons, these should
    be heterozygous SNPs in the parent's genotype, distinguishing its haplotypes
    and used to locate segments. For unphased-phased individuals, these should be the list of
    homozygous SNPs at the unphased individual (those that have data).
    If not specified, all SNPs are used.
    
    length_bound - minimum segment length bound type:
        None: no lower bound enforced 
        'base_pair': output segments of at least min_segment_length [base pair]
        'snp': output segments of at least min_segment_length consecutive SNPs out of the snps list.
               This is useful only if snps includes all SNPs (or is None) 
        *NOTE*: min_segment_length''s units are interpreted differently depending on length_bound.
         
    margin = fraction of segment to discard near the endpoints (margin/2 is removed from each side).'''
    
    if debug:
        print 'Computing IBD segments between haplotypes (%d,%d), (%d,%d); filter %s length %d' % \
        (id1, hap1_type, id2, hap2_type, error_filter, error_filter_length)
    d = diff.all_diffs(haplotype.data, id1, id2, hap1_type=hap1_type, hap2_type=hap2_type)[0]
    # Segment length, as defined by the input parameters 
    segment_length = lambda f: np.inf if not length_bound else (f.length if length_bound == 'base_pair' else f.num_snps)  # @UnusedVariable
    
    # Consider informative or the specified SNPs only
    snps = snps if snps is not None else haplotype.snp_range
    snps = np.intersect1d(snps, np.where(d != INDETERMINATE)[0])
    d_snps = d[snps]
    filtered_diff = filter_diff(d_snps, error_filter, error_filter_length)    
    error_snps = snps[np.nonzero(d_snps - filtered_diff)[0]]
    
    # Detect edges as non-zero gradient points; output sufficiently long segments
    bp = haplotype.snp['base_pair']
    num_snps = haplotype.num_snps
    if np.size(filtered_diff) == 0:
        # No data to consider ==> no IBD intervals can be identified
        segments = []
    else:
        deriv = ndimage.convolve(filtered_diff, [1, -1])    
        edge = np.where(deriv != 0)[0]
        initial_phase = hap1_type if filtered_diff[0] == 0 else 1 - hap1_type
        if debug:
            print 'initial_phase', initial_phase  # , 'edge', edge
        # Convert recombination locations to segments of no recombination; filter short segments
        segments = [f for f in (Segment(((x[0], x[1])), set(((id1, x[2]), (id2, hap2_type))),
                                        (bp[x[0]], segment.stop_bp(bp, x[1], num_snps)),
                                        error_snps=segment.in_segment(error_snps, x))
                                for x in segment.edges_to_segments(snps, edge, initial_phase,
                                                                   haplotype.num_snps, hap1_type))
                    if segment_length(f) >= min_segment_length]
    
    # Cut segment margins
    if margin >= constants.SMALL_FLOAT:
        segments = [s for s in (s.middle_part(haplotype.nearest_snp, bp, margin) for s in segments) if s]
    
    # Restrict errors to those inside segments
    segment_set = SegmentSet(segments,
                             np.array(util.flattened_meshgrid(reduce(list.__add__, (s.error_snps.tolist() for s in segments)),
                                                                        np.array([id1, id2])), dtype=int) \
                             if segments else gt.empty_errors_array())
    if debug:
        print 'ibs_segments()', segment_set
        print 'errors', segment_set.errors
    return segment_set