Exemple #1
0
def genotype_ibs_segments(genotype,
                          id1,
                          id2,
                          snps,
                          error_filter='median',
                          error_filter_length=5,
                          margin=0.0,
                          min_ibs_len_snp=400,
                          debug=False):
    '''Return Identical-by-State (IBS >= 1) segments between two genoypes of samples id1 and id2
    in the SNP range [snp[0],snp[1]) (if snp is a tuple) or the subset of SNPs, if snps is an array.
    
    See ibs_segments() for a description of optional parameters.'''
    num_snps = genotype.num_snps
    g = genotype.data
    g1 = recode.recode_single_genotype(g[snps, id1, :])
    g2 = recode.recode_single_genotype(g[snps, id2, :])
    d = (recode.ibs_state(g1, g2) == 0).astype(np.byte)

    # Consider informative or the specified SNPs only
    filtered_diff = filter_diff(d, error_filter, error_filter_length)
    error_snps = snps[np.nonzero(d - filtered_diff)[0]]

    # Detect edges as non-zero gradient points; output sufficiently long segments
    if np.size(filtered_diff) == 0:
        # No data to consider ==> no IBD intervals can be identified
        segments = []
    else:
        # Convert recombination locations to segments of no recombination; filter short segments
        bp = genotype.snp['base_pair']
        #print segment.segments_with_value(filtered_diff, 0, min_ibs_len_snp)
        segments = [
            Segment(((x[0], x[1])), [id1, id2],
                    (bp[x[0]], segment.stop_bp(bp, x[1], num_snps)),
                    error_snps=segment.in_segment(error_snps, x),
                    collapse_to_set=False) for x in
            segment.segments_with_value(filtered_diff, 0, min_ibs_len_snp)
        ]

    # Cut segment margins
    if margin >= constants.SMALL_FLOAT:
        segments = [
            s for s in (s.middle_part(
                genotype.nearest_snp, bp, margin, collapse_to_set=False)
                        for s in segments) if s
        ]

    # Restrict errors to those inside segments
    segment_set = SegmentSet(segments,
                             np.array(util.flattened_meshgrid(reduce(list.__add__, (s.error_snps.tolist() for s in segments)),
                                                              np.array([id1, id2])), dtype=int) \
                             if segments else gt.empty_errors_array())
    if debug:
        print 'ibs_segments()', segment_set
        print 'errors', segment_set.errors
    return segment_set
Exemple #2
0
def genotype_ibs_segments(genotype, id1, id2, snps,
                          error_filter='median', error_filter_length=5, margin=0.0,
                          min_ibs_len_snp=400, debug=False):
    '''Return Identical-by-State (IBS >= 1) segments between two genoypes of samples id1 and id2
    in the SNP range [snp[0],snp[1]) (if snp is a tuple) or the subset of SNPs, if snps is an array.
    
    See ibs_segments() for a description of optional parameters.'''
    num_snps = genotype.num_snps
    g = genotype.data
    g1 = recode.recode_single_genotype(g[snps, id1, :])
    g2 = recode.recode_single_genotype(g[snps, id2, :])
    d = (recode.ibs_state(g1, g2) == 0).astype(np.byte)

    # Consider informative or the specified SNPs only
    filtered_diff = filter_diff(d, error_filter, error_filter_length)
    error_snps = snps[np.nonzero(d - filtered_diff)[0]]
    
    # Detect edges as non-zero gradient points; output sufficiently long segments
    if np.size(filtered_diff) == 0:
        # No data to consider ==> no IBD intervals can be identified
        segments = []
    else:
        # Convert recombination locations to segments of no recombination; filter short segments
        bp = genotype.snp['base_pair']
        #print segment.segments_with_value(filtered_diff, 0, min_ibs_len_snp)
        segments = [Segment(((x[0], x[1])), [id1, id2], (bp[x[0]], segment.stop_bp(bp, x[1], num_snps)),
                            error_snps=segment.in_segment(error_snps, x), collapse_to_set=False)
                    for x in segment.segments_with_value(filtered_diff, 0, min_ibs_len_snp)]
    
    # Cut segment margins
    if margin >= constants.SMALL_FLOAT:
        segments = [s for s in (s.middle_part(genotype.nearest_snp, bp, margin, collapse_to_set=False)
                                for s in segments) if s]

    # Restrict errors to those inside segments
    segment_set = SegmentSet(segments,
                             np.array(util.flattened_meshgrid(reduce(list.__add__, (s.error_snps.tolist() for s in segments)),
                                                              np.array([id1, id2])), dtype=int) \
                             if segments else gt.empty_errors_array())
    if debug:
        print 'ibs_segments()', segment_set
        print 'errors', segment_set.errors
    return segment_set
Exemple #3
0
def ibs_diff(g, id1, id2):
    '''Return the IBS difference between two haplotypes (0 if IBS >= 1, 1 if IBS = 0).'''
    g1, g2 = recode.recode_single_genotype(
        g[:, id1, :]), recode.recode_single_genotype(g[:, id2, :])
    return (recode.ibs_state(g1, g2) == 0).astype(np.byte)
Exemple #4
0
def ibs_state(g, id1, id2):
    '''Return the IBS difference between two haplotypes (IBS=0,1 or 2).'''
    g1, g2 = recode.recode_single_genotype(
        g[:, id1, :]), recode.recode_single_genotype(g[:, id2, :])
    return recode.ibs_state(g1, g2)
Exemple #5
0
def ibs_diff(g, id1, id2):
    '''Return the IBS difference between two haplotypes (0 if IBS >= 1, 1 if IBS = 0).'''
    g1, g2 = recode.recode_single_genotype(g[:, id1, :]), recode.recode_single_genotype(g[:, id2, :])
    return (recode.ibs_state(g1, g2) == 0).astype(np.byte)
Exemple #6
0
def ibs_state(g, id1, id2):
    '''Return the IBS difference between two haplotypes (IBS=0,1 or 2).'''
    g1, g2 = recode.recode_single_genotype(g[:, id1, :]), recode.recode_single_genotype(g[:, id2, :])
    return recode.ibs_state(g1, g2)