def draw_dendrogram(self): ax_dendrogram = self.ax_dendrogram plt.subplot(ax_dendrogram, rasterized=True) hc = self.hierarchical_clustering_object dendrogram_dict = hc.dendrogram(orientation='right', get_leaves=True, distance_sort=True, color_threshold=self._cut_xdata, ax=ax_dendrogram) leaves = dendrogram_dict['leaves'] plt.setp(plt.gca().get_xticklabels(), rotation='vertical', fontsize=7) plt.gca().get_yaxis().set_visible(False) if self._sorted_index is None: self._sorted_index = hc.data.items[leaves]
def _plot_item_in_figure(self, index): data = self.current_cluster().data.ix[index] projected_data = self.current_cluster().projected_data.ix[index] prototype = self.current_cluster().prototype poi = self.current_cluster().points_of_interest.get(index, {}) tracked_poi = self.current_cluster().tracked_points_of_interest.get(index, {}) plt.figure() ax1 = plt.subplot(3, 1, 1) data.plot(ax=ax1, legend=False) plt.figlegend(*ax1.get_legend_handles_labels(), loc='lower center') if poi: lim_min, lim_max = ax1.get_ylim() height = (lim_max - lim_min) / 20 points_plotted_on = defaultdict(lambda: 0) for key, value in poi.iteritems(): colour = self.highlight_colours[key] for point in value: items_on_current_point = points_plotted_on[point] ax1.add_patch(Rectangle((point, lim_min + (height*items_on_current_point)), width=1, height=height, facecolor=colour, edgecolor='k')) points_plotted_on[point] += 1 plt.title('Original') ax2 = plt.subplot(3, 1, 2, sharey=ax1) prototype.plot(ax=ax2, legend=False) plt.title('Cluster Prototype') ax3 = plt.subplot(3, 1, 3, sharey=ax1) projected_data.plot(ax=ax3, legend=False) if tracked_poi: lim_min, lim_max = ax3.get_ylim() height = (lim_max - lim_min) / 20 points_plotted_on = defaultdict(lambda: 0) for key, value in tracked_poi.iteritems(): colour = self.highlight_colours[key] for point in value: items_on_current_point = points_plotted_on[point] ax3.add_patch(Rectangle((point, lim_min + (height*items_on_current_point)), width=1, height=height, facecolor=colour, edgecolor='k')) points_plotted_on[point] += 1 plt.title('Warped') plt.suptitle(index) figure_dtw_mappings = visualise_dtw_mappings(data, prototype, dtw_function=self.dtw_function, columns=data.columns, sequence_x_label=index, sequence_y_label='Cluster Prototype')
def prototype_images_from_node_list(node_list, output_directory): print '> Saving images to {0!r}'.format(output_directory) ndims = node_list[0].prototype.shape[1] plt.figure(figsize=(3 * ndims, 2)) for node in node_list: filename = '{0}.png'.format(node.id) full_filename = os.path.join(output_directory, filename) print '> Saving {0}'.format(full_filename) prototype_T = node.prototype.values.T for i in xrange(ndims): plt.subplot(1, ndims, i + 1) plt.plot(prototype_T[i]) plt.savefig(full_filename) plt.clf()
def prototype_images_from_node_list(node_list, output_directory): print '> Saving images to {0!r}'.format(output_directory) ndims = node_list[0].prototype.shape[1] plt.figure(figsize=(3 * ndims, 2)) for node in node_list: filename = '{0}.png'.format(node.id) full_filename = os.path.join(output_directory, filename) print '> Saving {0}'.format(full_filename) prototype_T = node.prototype.values.T for i in xrange(ndims): plt.subplot(1, ndims, i+1) plt.plot(prototype_T[i]) plt.savefig(full_filename) plt.clf()
def draw(self): ax_prototype = plt.subplot(self.gs_prototype()) plt.cla() plt.title('Prototype') current_cluster = self.current_cluster() current_cluster.prototype.plot(ax=ax_prototype, legend=False) plt.figlegend(*ax_prototype.get_legend_handles_labels(), loc='lower center') plt.subplot(self.gs_warping_preservation(), sharex=ax_prototype) plt.cla() wpc_vector = current_cluster.warping_conservation_vector() plt.plot(np.arange(0.5, len(wpc_vector), 1), wpc_vector) self.title.set_text('Cluster #{0}/{2} ({1} elements)'.format(self._current_cluster_id + 1, current_cluster.n_items, len(self._clusters))) plt.subplot(self.gs_heatmap()) plt.cla() self._ax_heatmap = plt.gca() shared_axis = current_cluster.data.plot_heatmap(horizontal_grid=True, subplot_spec=self.gs_heatmap(), sort_by=None, highlighted_points=current_cluster.points_of_interest, highlight_colours=self.highlight_colours) # Projections projections = current_cluster.projected_data ax_projections = plt.subplot(self.gs_projected_mean()) plt.cla() plt.title('Average DTW projection onto prototype') projections.mean().plot(ax=ax_projections, legend=False) plt.subplot(self.gs_projected_heatmap()) plt.cla() self._ax_projected_heatmap = plt.gca() if self._warping_conservation_view: current_cluster.projected_data.plot_heatmap(horizontal_grid=True, subplot_spec=self.gs_projected_heatmap(), share_y_axis=shared_axis, sort_by=None, highlighted_points=current_cluster.tracked_points_of_interest, highlight_colours=self.highlight_colours, replace_with_dataframe=current_cluster.warping_conservation_data) else: current_cluster.projected_data.plot_heatmap(horizontal_grid=True, subplot_spec=self.gs_projected_heatmap(), share_y_axis=shared_axis, sort_by=None, highlighted_points=current_cluster.tracked_points_of_interest, highlight_colours=self.highlight_colours) # Finally issue a draw command for the plot plt.draw()
def create_figure(self, figsize=(12, 10), interactive=True): plt.figure(num=None, figsize=figsize, facecolor='w', edgecolor='k') self._gs_main = gridspec.GridSpec(2, 2, wspace=0, height_ratios=[1, 15]) self._figure = plt.gcf() self._ax_dendrogram = plt.subplot(self.gs_dendrogram, rasterized=True) if interactive: self._figure.canvas.mpl_connect('button_press_event', self._onclick_listener) self.draw_buttons()
def visualise_dtw_mappings(sequence_x, sequence_y, dtw_function=dtw_std, columns=None, title=None, sequence_x_label=None, sequence_y_label=None): def major_tick_step(ax, axis): if axis == 'x': ticks = ax.get_xticks() else: ticks = ax.get_yticks() try: step = ticks[1] - ticks[0] except IndexError: step = 0.2 return step def expand_axes(ax): x_increment = major_tick_step(ax, 'x') / 8.0 min_x, max_x = ax.get_xlim() ax.set_xlim(min_x - x_increment, max_x + x_increment) y_increment = major_tick_step(ax, 'y') / 8.0 min_y, max_y = ax.get_ylim() ax.set_ylim(min_y - y_increment, max_y + y_increment) def add_reversed_annotation(ax): min_x, max_x = ax.get_xlim() min_y, max_y = ax.get_ylim() offset_x = major_tick_step(ax, 'x') offset_y = major_tick_step(ax, 'y') ax.text(min_x + offset_x / 8, max_y - offset_y / 2 - offset_y / 8, '(reversed)') dist, cost, path = dtw_function(sequence_x, sequence_y, dist_only=False) sequence_x = np.asarray(sequence_x) sequence_y = np.asarray(sequence_y) try: ndim = sequence_x.shape[1] except IndexError: ndim = 1 reversed = dtw_path_is_reversed(path) if reversed: sequence_x = reverse_sequence(sequence_x) path_x = np.max(path[0]) - path[0] path_y = path[1] path = (path_x, path_y) sequence_y_T = np.atleast_2d(sequence_y.T) sequence_x_T = np.atleast_2d(sequence_x.T) if columns is None and ndim > 1: columns = ['Dimension #{0}'.format(i) for i in range(1, ndim + 1)] elif ndim > 1: if len(columns) != ndim: raise ValueError( 'Number of column titles does not match the number of dimensions' ) main_y_axis = None xaxes_regular = [None] * ndim xaxes_warped = [None] * ndim figure = plt.figure() figure.subplots_adjust(wspace=0.01, hspace=0.1) for i in range(ndim): x = sequence_x_T[i] print x.shape y = sequence_y_T[i] ax2 = plt.subplot(2, ndim, ndim + i + 1, sharey=main_y_axis, sharex=xaxes_warped[i]) ax2.plot(y, color='g') if i > 0: ax2.yaxis.set_visible(False) expand_axes(ax2) if not main_y_axis: main_y_axis = ax2 if not xaxes_warped[i]: xaxes_warped[i] = ax2 ax1 = plt.subplot(2, ndim, i + 1, sharey=main_y_axis, sharex=xaxes_regular[i]) ax1.plot(x, color='b') expand_axes(ax1) if ndim > 1: ax1.set_title(columns[i]) if i > 0: ax1.yaxis.set_visible(False) if not xaxes_regular[i]: xaxes_regular[i] = ax1 if reversed: add_reversed_annotation(ax1) for p_i, p_j in zip(path[0], path[1]): xy_a = (p_i, x[p_i]) xy_b = (p_j, y[p_j]) con = ConnectionPatch(xyA=xy_a, xyB=xy_b, coordsA="data", coordsB="data", axesA=ax1, axesB=ax2, arrowstyle="-", shrinkB=2, shrinkA=2, alpha=0.2) ax1.add_artist(con) if title is not None: plt.suptitle(title) lines = xaxes_regular[0].get_lines() lines.extend(xaxes_warped[0].get_lines()) if sequence_x_label and sequence_y_label: plt.figlegend(lines, (sequence_x_label, sequence_y_label), 'lower center') return figure
def visualise_dtw_mappings(sequence_x, sequence_y, dtw_function=dtw_std, columns=None, title=None, sequence_x_label=None, sequence_y_label=None): def major_tick_step(ax, axis): if axis == 'x': ticks = ax.get_xticks() else: ticks = ax.get_yticks() try: step = ticks[1] - ticks[0] except IndexError: step = 0.2 return step def expand_axes(ax): x_increment = major_tick_step(ax, 'x') / 8.0 min_x, max_x = ax.get_xlim() ax.set_xlim(min_x - x_increment, max_x + x_increment) y_increment = major_tick_step(ax, 'y') / 8.0 min_y, max_y = ax.get_ylim() ax.set_ylim(min_y - y_increment, max_y + y_increment) def add_reversed_annotation(ax): min_x, max_x = ax.get_xlim() min_y, max_y = ax.get_ylim() offset_x = major_tick_step(ax, 'x') offset_y = major_tick_step(ax, 'y') ax.text(min_x + offset_x / 8, max_y - offset_y / 2 - offset_y / 8, '(reversed)') dist, cost, path = dtw_function(sequence_x, sequence_y, dist_only=False) sequence_x = np.asarray(sequence_x) sequence_y = np.asarray(sequence_y) try: ndim = sequence_x.shape[1] except IndexError: ndim = 1 reversed = dtw_path_is_reversed(path) if reversed: sequence_x = reverse_sequence(sequence_x) path_x = np.max(path[0]) - path[0] path_y = path[1] path = (path_x, path_y) sequence_y_T = np.atleast_2d(sequence_y.T) sequence_x_T = np.atleast_2d(sequence_x.T) if columns is None and ndim > 1: columns = ['Dimension #{0}'.format(i) for i in range(1, ndim+1)] elif ndim > 1: if len(columns) != ndim: raise ValueError('Number of column titles does not match the number of dimensions') main_y_axis = None xaxes_regular = [None] * ndim xaxes_warped = [None] * ndim figure = plt.figure() figure.subplots_adjust(wspace=0.01, hspace=0.1) for i in range(ndim): x = sequence_x_T[i] print x.shape y = sequence_y_T[i] ax2 = plt.subplot(2, ndim, ndim + i + 1, sharey=main_y_axis, sharex=xaxes_warped[i]) ax2.plot(y, color='g') if i > 0: ax2.yaxis.set_visible(False) expand_axes(ax2) if not main_y_axis: main_y_axis = ax2 if not xaxes_warped[i]: xaxes_warped[i] = ax2 ax1 = plt.subplot(2, ndim, i + 1, sharey=main_y_axis, sharex=xaxes_regular[i]) ax1.plot(x, color='b') expand_axes(ax1) if ndim > 1: ax1.set_title(columns[i]) if i > 0: ax1.yaxis.set_visible(False) if not xaxes_regular[i]: xaxes_regular[i] = ax1 if reversed: add_reversed_annotation(ax1) for p_i, p_j in zip(path[0], path[1]): xy_a = (p_i, x[p_i]) xy_b = (p_j, y[p_j]) con = ConnectionPatch(xyA=xy_a, xyB=xy_b, coordsA="data", coordsB="data", axesA=ax1, axesB=ax2, arrowstyle="-", shrinkB=2, shrinkA=2, alpha=0.2) ax1.add_artist(con) if title is not None: plt.suptitle(title) lines = xaxes_regular[0].get_lines() lines.extend(xaxes_warped[0].get_lines()) if sequence_x_label and sequence_y_label: plt.figlegend(lines, (sequence_x_label, sequence_y_label), 'lower center') return figure
def plot(alignments, titles=None, horizontal_grid=True, no_y_axis=False, sort_by=None, subplot_spec=None, share_y_axis=None, scale_y_axis=None, highlighted_points={}, highlight_colours=None, rasterized=True, replace_with_dataframe=None): """ :param alignments: `AlignmentsData` object :param titles: Titles of the heatmaps. If set to none `alignments.dataset_axis` will be used :param horizontal_grid: Whether to plot heatmap on a horizontal grid :param no_y_axis: Will not plot the major (items) axis. :param sort_by: Sort values by 'length' or supplied index. :param subplot_spec: SubplotSpec of the suplot to plot hte heatmaps in. See `matplotlib.gridspec` package. :param share_y_axis: if not None, the plot will share the major (items) axis with the specified axis :param scale_y_axis: plot will scale the y axis by the specified number (linearly) if set. :type scale_y_axis: int :param highlighted_points: a (index, array) dictionary of points that should be highlighted in the heatmap :param: rasterized: whether to rasterize the plot or not (faster rending for rasterized) :param replace_with_dataframe: replace the data with the dataframe provided. Use for toggleable overlays :return: returns the shared y axis """ from dgw.util.plotting import pyplot as plt from matplotlib import gridspec number_of_datasets = alignments.number_of_datasets if titles is None: titles = alignments.dataset_axis # Should be OK just to just sort the first dataset as all others should have the same len sample_data_frame = alignments.dataset_xs(alignments.dataset_axis[0], copy=False).T lengths = sample_data_frame.apply(no_nans_len, axis=1) max_len = lengths.max() # Sorting if isinstance(sort_by, pd.Index): sorted_index = sort_by elif sort_by == 'length': debug('Sorting by length') lengths.sort() # Should do in-place sorted_index = lengths.index elif sort_by is None: sorted_index = alignments.items else: raise ValueError('Unsupported sort_by value provided: {0!r}. Only None or \'length\' supported'.format(sort_by)) # Apply sorting alignments = alignments.ix[sorted_index] # Create the instance of axis formatter tick_formatter = dataset_ticks(alignments, scale_y_axis) # Subplot creation if horizontal_grid: grid = (1, number_of_datasets + 1) width_ratios = [5] * number_of_datasets width_ratios.append(1) spacing_kwargs = {'wspace': 0.01, 'width_ratios': width_ratios} # Almost no space between plots else: grid = (number_of_datasets, 2) spacing_kwargs = {'hspace': 0.15, 'width_ratios': [5, 1]} # Allow a bit of a space for title if not subplot_spec: gs = gridspec.GridSpec(*grid, **spacing_kwargs) else: gs = gridspec.GridSpecFromSubplotSpec(*grid, subplot_spec=subplot_spec, **spacing_kwargs) # Main drawing loop first_axis = None extent = None if scale_y_axis: extent = [0, max_len, 0, alignments.number_of_items * scale_y_axis] # Multiply by 10 as that is what matplotlib's dendrogram returns if highlighted_points: highlight_masks = defaultdict(lambda: np.zeros((alignments.number_of_items, max_len), dtype=np.bool)) for i, ix in enumerate(sorted_index): try: points_of_interest = highlighted_points[ix] except KeyError: continue for j, points in points_of_interest.iteritems(): highlight_masks[j][i][points] = 1 for j in highlight_masks.iterkeys(): highlight_masks[j] = np.ma.masked_where(highlight_masks[j] <= 0, highlight_masks[j]) else: highlight_masks = None for i, (ix, title) in enumerate(zip(alignments.dataset_axis, titles)): t_gs = gs[:, i] if horizontal_grid else gs[i, 1] if i == 0: if not share_y_axis: first_axis = plt.subplot(t_gs) share_y_axis = first_axis else: first_axis = plt.subplot(t_gs, sharey=share_y_axis) else: # Remember to share axes plt.subplot(t_gs, sharex=first_axis, sharey=share_y_axis) plt.gca().get_yaxis().set_major_formatter(tick_formatter) #plt.gca().get_yaxis().set_major_locator(IndexLocator(1000, 0)) # Remove redundant axes if horizontal_grid: if i > 0 or no_y_axis: plt.gca().get_yaxis().set_visible(False) else: # Leave only last axis if i + 1 < number_of_datasets: plt.gca().get_xaxis().set_visible(False) if no_y_axis: plt.gca().get_yaxis().set_visible(False) data_to_plot = alignments.dataset_xs(ix, copy=False).T # Cut most of the columns that are NaNs out of the plot data_to_plot = data_to_plot[data_to_plot.columns[:max_len]] if replace_with_dataframe is not None: result = raw_plot_data_as_heatmap(replace_with_dataframe, extent=extent, rasterized=rasterized) else: result = raw_plot_data_as_heatmap(data_to_plot, extent=extent, highlight_masks=highlight_masks, rasterized=rasterized, highlight_colours=highlight_colours) debug(plt.gca().get_ylim()) debug(plt.gca().get_xlim()) plt.title(title) plt.gca().title.set_fontsize(7) plt.gca().grid(False) plt.setp(plt.gca().get_xticklabels(), rotation='vertical', fontsize=7) # Colorbar colorbar_axis = plt.subplot(gs[:, number_of_datasets] if horizontal_grid else gs[:, 2]) plt.colorbar(result, cax=colorbar_axis) return first_axis
def ax_projected_heatmap(self): if self._ax_projected_heatmap is None: self._ax_projected_heatmap = plt.subplot(self.gs_projected_heatmap()) return self._ax_projected_heatmap