def form_intersections(segments, full_output=False, samples=None, snp_range=None): '''Return a list of bins whose ith element is the list of samples that are equal over [endpoints[i], endpoints[i+1]]. This is done by scanning all ibd_pairs entries and accumulating them into the intersecting segments' bins. Union set of all segment endpoints and sort ascending. These form the set of intersections of all pairwise_ibd segments.''' non_unique_endpoints = [s[0] for s in segments] # print 'snp_range', snp_range if snp_range is not None: non_unique_endpoints += [(snp_range[START], snp_range[STOP])] endpoints = sorted(reduce(set.union, non_unique_endpoints, set([]))) # This index makes it easier to random-access bins endpoint_index = dict((x, index) for (index, x) in enumerate(endpoints)) sub_segments = [(endpoints[i], endpoints[i + 1]) for i in xrange(len(endpoints) - 1)] # print 'endpoints', endpoints # print 'sub_segments', sub_segments n = len(endpoints) intersections = [list([]) for _ in xrange(n - 1)] if full_output: value_segment_tuples = [list([]) for _ in xrange(n - 1)] segment_local_value_tuples = [] # Sweep segment intersections (bins) in order, accumulate segment info into each one for k, segment in enumerate(segments): snp = segment[0] for i in xrange(endpoint_index[snp[0]], endpoint_index[snp[1]]): intersections[i].append(segment[1]) if full_output: value_segment_tuples[i] += [(x, k) for x in segment[1]] segment_local_value_tuples += [(k, (i, x)) for x in segment[1]] if full_output: # print 'segment_local_value_tuples', segment_local_value_tuples segment_to_local_values = util.to_set_dict(segment_local_value_tuples) # print 'segment_to_local_values', segment_to_local_values # print [x for s in segments for x in s[1]] all_samples = samples if samples is not None else util.union_all(x for s in segments for x in s[1]) # print 'all_samples', all_samples all_local_values = set(itertools.product(xrange(n - 1), all_samples)) dangling_local_values = all_local_values - util.union_all(*segment_to_local_values.values()) # print 'dangling_local_values', dangling_local_values value_to_segments = [util.to_set_dict(d) for d in value_segment_tuples] # print 'value_to_segments', value_to_segments util.to_set_dict((x, k) for k, s in enumerate(segments) for x in s[1]) return (sub_segments, intersections, value_to_segments, segment_to_local_values, dangling_local_values) else: return (sub_segments, intersections)
def group_to_color(segments, samples=None, segment_gap=25, snp_range=None): '''Given a -list- of key-value pairs ((start, stop), (v1,v2)), where the key denotes a segment [start,stop) of the infinite integer lattice on which v1 and v2 are equal, Generate a haplotype coloring plot from a list of IBD segments. samples is the set of haplotype identifiers. if pair_gap is specified, haplotypes pairs are separated by this amount [pixels], assuming that their number is even. segment_gap is the # of SNPs to drop from each segment''s margins for the purpose of defining colors. This allows a slight overlap between segments without making them the same color, which is usually undesirable.''' # Define colors using slightly-smaller portions of the original segments segment_groups = __group_to_color( [(__dilute_segment(x[0], segment_gap), x[1]) for x in segments], samples, snp_range=snp_range) # Translate back result to the original segments. Use sub-segments, dangling nodes of the # original segments (note: there may be more irrelevant dangling nodes in the diluted segments) sub_segments, _, _, segment_to_local_values, dangling_local_values = \ im.segment.form_intersections(segments, True, samples=samples, snp_range=snp_range) # Translate each connected component from a list of segments to a list of local values # (i.e., haplotype parts of the same color, for each color) return sub_segments, np.array([ list(util.union_all(*(segment_to_local_values[x] for x in group))) for group in segment_groups ] + [[x] for x in dangling_local_values])
def plot_hap_coloring_from_colors(d, haps=None, title=None, xlabel=None, ylabel=None, y=None, pair_gap=0, linewidth=6, segment_gap=25, snp_range=None, colors=None): '''Generate a haplotype coloring plot from coloring scheme d.''' # Generate haplotype colors sub_segments, groups = d haps = haps if haps is not None \ else sorted(util.union_all(*(map(operator.itemgetter(1), x) for x in groups))) # Prepare axes colors = colors if colors is not None else im.plot.colors.get_colors() num_haps = len(haps) hap_index = dict((hap, k) for k, hap in enumerate(haps)) custom_label = y is not None y = y if y is not None else map(repr, haps) xmin, xmax = sub_segments[0][0] - 1, sub_segments[-1][1] + 1 x_scaled = lambda x: (1.0 * (x - xmin)) / (xmax - xmin) P.clf() P.hold(True) if title is not None: P.title(title) P.xlim([xmin, xmax]) P.xlabel(xlabel if xlabel else 'SNP #') P.ylabel('Sample') hap_ticks = np.arange(0, num_haps) if pair_gap > 0: hap_ticks += np.array( list( it.chain.from_iterable( map(lambda x: (x, x), xrange(num_haps / 2))))) # hap_ticks = list(reversed(hap_ticks)) P.yticks(hap_ticks, y) P.ylim([hap_ticks[0] - 0.5, hap_ticks[-1] + 0.5]) for ((_, group), color) in it.izip(enumerate(groups), colors): # Draw lines for all members of the group using the same color for k, hap in group: print 'region %-2d [%-4d,%-4d] x %.2f:%.2f hap (%-4d, %d)%s color (%.2f, %.2f, %.2f)' % \ (k, sub_segments[k][0], sub_segments[k][1], x_scaled(sub_segments[k][0]), x_scaled(sub_segments[k][1]), hap[0], hap[1], ' ylabel %-10s' % (y[hap_index[hap]],) if custom_label else '', color[0], color[1], color[2]) P.axhline(y=hap_ticks[hap_index[hap]], xmin=x_scaled(sub_segments[k][0]), xmax=x_scaled(sub_segments[k][1]), linewidth=linewidth, color=color) P.show() return sub_segments, groups
def __group_to_color(segments, samples=None, snp_range=None): '''Given a -list- of key-value pairs ((start, stop), (v1,v2)), where the key denotes a segment [start,stop) of the infinite integer lattice on which v1 and v2 are equal, Return a list of lists, each of contains segments of the same color (IBD sharing).''' (sub_segments, intersections, value_to_segments, _, _) = \ im.segment.form_intersections(segments, True, samples=samples, snp_range=snp_range) # Build a graph G where the nodes = segments and edges = (segments intersect AND their sample # sets intersect). G's connected components are groups, where group is a set of segments of the # same color. return nx.connected_components(nx.from_edgelist(it.chain.from_iterable(it.product(sharing_segments, sharing_segments) for sharing_segments in (util.union_all(*(value_to_segments[i][x] for x in component)) for i in xrange(len(sub_segments)) for component in nx.connected_components(nx.Graph(intersections[i]))))))
def __group_to_color(segments, samples=None, snp_range=None): '''Given a -list- of key-value pairs ((start, stop), (v1,v2)), where the key denotes a segment [start,stop) of the infinite integer lattice on which v1 and v2 are equal, Return a list of lists, each of contains segments of the same color (IBD sharing).''' (sub_segments, intersections, value_to_segments, _, _) = \ form_intersections(segments, True, samples=samples, snp_range=snp_range) # Build a graph G where the nodes = segments and edges = (segments intersect AND their sample # sets intersect). G's connected components are groups, where group is a set of segments of the # same color. return nx.connected_components(nx.from_edgelist(itertools.chain.from_iterable(itertools.product(sharing_segments, sharing_segments) for sharing_segments in (util.union_all(*(value_to_segments[i][x] for x in component)) for i in xrange(len(sub_segments)) for component in nx.connected_components(nx.Graph(intersections[i]))))))
def plot_hap_coloring_from_colors(d, haps=None, title=None, xlabel=None, ylabel=None, y=None, pair_gap=0, linewidth=6, segment_gap=25, snp_range=None, colors=None): '''Generate a haplotype coloring plot from coloring scheme d.''' # Generate haplotype colors sub_segments, groups = d haps = haps if haps is not None \ else sorted(util.union_all(*(map(operator.itemgetter(1), x) for x in groups))) # Prepare axes colors = colors if colors is not None else im.plot.colors.get_colors() num_haps = len(haps) hap_index = dict((hap, k) for k, hap in enumerate(haps)) custom_label = y is not None y = y if y is not None else map(repr, haps) xmin, xmax = sub_segments[0][0] - 1, sub_segments[-1][1] + 1 x_scaled = lambda x: (1.0 * (x - xmin)) / (xmax - xmin) P.clf() P.hold(True) if title is not None: P.title(title) P.xlim([xmin, xmax]) P.xlabel(xlabel if xlabel else 'SNP #') P.ylabel('Sample') hap_ticks = np.arange(0, num_haps) if pair_gap > 0: hap_ticks += np.array(list(it.chain.from_iterable(map(lambda x: (x, x), xrange(num_haps / 2))))) # hap_ticks = list(reversed(hap_ticks)) P.yticks(hap_ticks, y) P.ylim([hap_ticks[0] - 0.5, hap_ticks[-1] + 0.5]) for ((_, group), color) in it.izip(enumerate(groups), colors): # Draw lines for all members of the group using the same color for k, hap in group: print 'region %-2d [%-4d,%-4d] x %.2f:%.2f hap (%-4d, %d)%s color (%.2f, %.2f, %.2f)' % \ (k, sub_segments[k][0], sub_segments[k][1], x_scaled(sub_segments[k][0]), x_scaled(sub_segments[k][1]), hap[0], hap[1], ' ylabel %-10s' % (y[hap_index[hap]],) if custom_label else '', color[0], color[1], color[2]) P.axhline(y=hap_ticks[hap_index[hap]], xmin=x_scaled(sub_segments[k][0]), xmax=x_scaled(sub_segments[k][1]), linewidth=linewidth, color=color) P.show() return sub_segments, groups
def group_to_color(segments, samples=None, segment_gap=25, snp_range=None): '''Given a -list- of key-value pairs ((start, stop), (v1,v2)), where the key denotes a segment [start,stop) of the infinite integer lattice on which v1 and v2 are equal, Generate a haplotype coloring plot from a list of IBD segments. samples is the set of haplotype identifiers. if pair_gap is specified, haplotypes pairs are separated by this amount [pixels], assuming that their number is even. segment_gap is the # of SNPs to drop from each segment''s margins for the purpose of defining colors. This allows a slight overlap between segments without making them the same color, which is usually undesirable.''' # Define colors using slightly-smaller portions of the original segments segment_groups = __group_to_color([(__dilute_segment(x[0], segment_gap), x[1]) for x in segments], samples, snp_range=snp_range) # Translate back result to the original segments. Use sub-segments, dangling nodes of the # original segments (note: there may be more irrelevant dangling nodes in the diluted segments) sub_segments, _, _, segment_to_local_values, dangling_local_values = \ im.segment.form_intersections(segments, True, samples=samples, snp_range=snp_range) # Translate each connected component from a list of segments to a list of local values # (i.e., haplotype parts of the same color, for each color) return sub_segments, np.array([list(util.union_all(*(segment_to_local_values[x] for x in group))) for group in segment_groups] + [[x] for x in dangling_local_values])