Beispiel #1
0
    def draw_dendrogram(self):
        ax_dendrogram = self.ax_dendrogram
        plt.subplot(ax_dendrogram, rasterized=True)
        hc = self.hierarchical_clustering_object

        dendrogram_dict = hc.dendrogram(orientation='right', get_leaves=True,  distance_sort=True,
                                        color_threshold=self._cut_xdata, ax=ax_dendrogram)
        leaves = dendrogram_dict['leaves']

        plt.setp(plt.gca().get_xticklabels(), rotation='vertical', fontsize=7)
        plt.gca().get_yaxis().set_visible(False)

        if self._sorted_index is None:
            self._sorted_index = hc.data.items[leaves]
Beispiel #2
0
    def _plot_item_in_figure(self, index):
        data = self.current_cluster().data.ix[index]
        projected_data = self.current_cluster().projected_data.ix[index]
        prototype = self.current_cluster().prototype

        poi = self.current_cluster().points_of_interest.get(index, {})
        tracked_poi = self.current_cluster().tracked_points_of_interest.get(index, {})

        plt.figure()
        ax1 = plt.subplot(3, 1, 1)
        data.plot(ax=ax1, legend=False)
        plt.figlegend(*ax1.get_legend_handles_labels(), loc='lower center')
        if poi:
            lim_min, lim_max = ax1.get_ylim()
            height = (lim_max - lim_min) / 20
            points_plotted_on = defaultdict(lambda: 0)
            for key, value in poi.iteritems():
                colour = self.highlight_colours[key]
                for point in value:
                    items_on_current_point = points_plotted_on[point]
                    ax1.add_patch(Rectangle((point, lim_min + (height*items_on_current_point)), width=1, height=height, facecolor=colour, edgecolor='k'))
                    points_plotted_on[point] += 1
        plt.title('Original')

        ax2 = plt.subplot(3, 1, 2, sharey=ax1)
        prototype.plot(ax=ax2, legend=False)
        plt.title('Cluster Prototype')

        ax3 = plt.subplot(3, 1, 3, sharey=ax1)
        projected_data.plot(ax=ax3, legend=False)

        if tracked_poi:
            lim_min, lim_max = ax3.get_ylim()
            height = (lim_max - lim_min) / 20
            points_plotted_on = defaultdict(lambda: 0)
            for key, value in tracked_poi.iteritems():
                colour = self.highlight_colours[key]
                for point in value:
                    items_on_current_point = points_plotted_on[point]
                    ax3.add_patch(Rectangle((point, lim_min + (height*items_on_current_point)), width=1, height=height,
                                            facecolor=colour, edgecolor='k'))
                    points_plotted_on[point] += 1

        plt.title('Warped')
        plt.suptitle(index)

        figure_dtw_mappings = visualise_dtw_mappings(data, prototype, dtw_function=self.dtw_function,
                                                     columns=data.columns, sequence_x_label=index,
                                                     sequence_y_label='Cluster Prototype')
Beispiel #3
0
def prototype_images_from_node_list(node_list, output_directory):
    print '> Saving images to {0!r}'.format(output_directory)
    ndims = node_list[0].prototype.shape[1]
    plt.figure(figsize=(3 * ndims, 2))
    for node in node_list:
        filename = '{0}.png'.format(node.id)
        full_filename = os.path.join(output_directory, filename)
        print '> Saving {0}'.format(full_filename)
        prototype_T = node.prototype.values.T
        for i in xrange(ndims):
            plt.subplot(1, ndims, i + 1)
            plt.plot(prototype_T[i])

        plt.savefig(full_filename)
        plt.clf()
Beispiel #4
0
def prototype_images_from_node_list(node_list, output_directory):
    print '> Saving images to {0!r}'.format(output_directory)
    ndims = node_list[0].prototype.shape[1]
    plt.figure(figsize=(3 * ndims, 2))
    for node in node_list:
        filename = '{0}.png'.format(node.id)
        full_filename = os.path.join(output_directory, filename)
        print '> Saving {0}'.format(full_filename)
        prototype_T = node.prototype.values.T
        for i in xrange(ndims):
            plt.subplot(1, ndims, i+1)
            plt.plot(prototype_T[i])

        plt.savefig(full_filename)
        plt.clf()
Beispiel #5
0
    def draw(self):

        ax_prototype = plt.subplot(self.gs_prototype())
        plt.cla()
        plt.title('Prototype')
        current_cluster = self.current_cluster()
        current_cluster.prototype.plot(ax=ax_prototype, legend=False)

        plt.figlegend(*ax_prototype.get_legend_handles_labels(), loc='lower center')

        plt.subplot(self.gs_warping_preservation(), sharex=ax_prototype)
        plt.cla()
        wpc_vector = current_cluster.warping_conservation_vector()
        plt.plot(np.arange(0.5, len(wpc_vector), 1), wpc_vector)


        self.title.set_text('Cluster #{0}/{2} ({1} elements)'.format(self._current_cluster_id + 1,
                                                                     current_cluster.n_items,
                                                                     len(self._clusters)))

        plt.subplot(self.gs_heatmap())
        plt.cla()
        self._ax_heatmap = plt.gca()

        shared_axis = current_cluster.data.plot_heatmap(horizontal_grid=True, subplot_spec=self.gs_heatmap(),
                                                        sort_by=None, highlighted_points=current_cluster.points_of_interest,
                                                        highlight_colours=self.highlight_colours)

        # Projections
        projections = current_cluster.projected_data

        ax_projections = plt.subplot(self.gs_projected_mean())
        plt.cla()
        plt.title('Average DTW projection onto prototype')
        projections.mean().plot(ax=ax_projections, legend=False)

        plt.subplot(self.gs_projected_heatmap())
        plt.cla()
        self._ax_projected_heatmap = plt.gca()

        if self._warping_conservation_view:
            current_cluster.projected_data.plot_heatmap(horizontal_grid=True, subplot_spec=self.gs_projected_heatmap(),
                                                    share_y_axis=shared_axis, sort_by=None,
                                                    highlighted_points=current_cluster.tracked_points_of_interest,
                                                    highlight_colours=self.highlight_colours,
                                                    replace_with_dataframe=current_cluster.warping_conservation_data)
        else:
            current_cluster.projected_data.plot_heatmap(horizontal_grid=True, subplot_spec=self.gs_projected_heatmap(),
                                                                share_y_axis=shared_axis, sort_by=None,
                                                                highlighted_points=current_cluster.tracked_points_of_interest,
                                                                highlight_colours=self.highlight_colours)


        # Finally issue a draw command for the plot
        plt.draw()
Beispiel #6
0
    def create_figure(self, figsize=(12, 10), interactive=True):
        plt.figure(num=None, figsize=figsize, facecolor='w', edgecolor='k')
        self._gs_main = gridspec.GridSpec(2, 2, wspace=0, height_ratios=[1, 15])
        self._figure = plt.gcf()
        self._ax_dendrogram = plt.subplot(self.gs_dendrogram, rasterized=True)

        if interactive:
            self._figure.canvas.mpl_connect('button_press_event', self._onclick_listener)
            self.draw_buttons()
Beispiel #7
0
def visualise_dtw_mappings(sequence_x,
                           sequence_y,
                           dtw_function=dtw_std,
                           columns=None,
                           title=None,
                           sequence_x_label=None,
                           sequence_y_label=None):
    def major_tick_step(ax, axis):
        if axis == 'x':
            ticks = ax.get_xticks()
        else:
            ticks = ax.get_yticks()

        try:
            step = ticks[1] - ticks[0]
        except IndexError:
            step = 0.2

        return step

    def expand_axes(ax):
        x_increment = major_tick_step(ax, 'x') / 8.0

        min_x, max_x = ax.get_xlim()
        ax.set_xlim(min_x - x_increment, max_x + x_increment)

        y_increment = major_tick_step(ax, 'y') / 8.0
        min_y, max_y = ax.get_ylim()
        ax.set_ylim(min_y - y_increment, max_y + y_increment)

    def add_reversed_annotation(ax):
        min_x, max_x = ax.get_xlim()
        min_y, max_y = ax.get_ylim()

        offset_x = major_tick_step(ax, 'x')
        offset_y = major_tick_step(ax, 'y')
        ax.text(min_x + offset_x / 8, max_y - offset_y / 2 - offset_y / 8,
                '(reversed)')

    dist, cost, path = dtw_function(sequence_x, sequence_y, dist_only=False)

    sequence_x = np.asarray(sequence_x)
    sequence_y = np.asarray(sequence_y)

    try:
        ndim = sequence_x.shape[1]
    except IndexError:
        ndim = 1

    reversed = dtw_path_is_reversed(path)
    if reversed:
        sequence_x = reverse_sequence(sequence_x)
        path_x = np.max(path[0]) - path[0]
        path_y = path[1]
        path = (path_x, path_y)

    sequence_y_T = np.atleast_2d(sequence_y.T)
    sequence_x_T = np.atleast_2d(sequence_x.T)

    if columns is None and ndim > 1:
        columns = ['Dimension #{0}'.format(i) for i in range(1, ndim + 1)]
    elif ndim > 1:
        if len(columns) != ndim:
            raise ValueError(
                'Number of column titles does not match the number of dimensions'
            )

    main_y_axis = None
    xaxes_regular = [None] * ndim
    xaxes_warped = [None] * ndim
    figure = plt.figure()
    figure.subplots_adjust(wspace=0.01, hspace=0.1)
    for i in range(ndim):
        x = sequence_x_T[i]
        print x.shape
        y = sequence_y_T[i]

        ax2 = plt.subplot(2,
                          ndim,
                          ndim + i + 1,
                          sharey=main_y_axis,
                          sharex=xaxes_warped[i])
        ax2.plot(y, color='g')

        if i > 0:
            ax2.yaxis.set_visible(False)

        expand_axes(ax2)

        if not main_y_axis:
            main_y_axis = ax2

        if not xaxes_warped[i]:
            xaxes_warped[i] = ax2

        ax1 = plt.subplot(2,
                          ndim,
                          i + 1,
                          sharey=main_y_axis,
                          sharex=xaxes_regular[i])
        ax1.plot(x, color='b')
        expand_axes(ax1)

        if ndim > 1:
            ax1.set_title(columns[i])

        if i > 0:
            ax1.yaxis.set_visible(False)

        if not xaxes_regular[i]:
            xaxes_regular[i] = ax1
        if reversed:
            add_reversed_annotation(ax1)

        for p_i, p_j in zip(path[0], path[1]):
            xy_a = (p_i, x[p_i])
            xy_b = (p_j, y[p_j])
            con = ConnectionPatch(xyA=xy_a,
                                  xyB=xy_b,
                                  coordsA="data",
                                  coordsB="data",
                                  axesA=ax1,
                                  axesB=ax2,
                                  arrowstyle="-",
                                  shrinkB=2,
                                  shrinkA=2,
                                  alpha=0.2)

            ax1.add_artist(con)

    if title is not None:
        plt.suptitle(title)

    lines = xaxes_regular[0].get_lines()
    lines.extend(xaxes_warped[0].get_lines())

    if sequence_x_label and sequence_y_label:
        plt.figlegend(lines, (sequence_x_label, sequence_y_label),
                      'lower center')

    return figure
Beispiel #8
0
def visualise_dtw_mappings(sequence_x, sequence_y, dtw_function=dtw_std, columns=None, title=None, sequence_x_label=None,
                           sequence_y_label=None):

    def major_tick_step(ax, axis):
        if axis == 'x':
            ticks = ax.get_xticks()
        else:
            ticks = ax.get_yticks()

        try:
            step = ticks[1] - ticks[0]
        except IndexError:
            step = 0.2

        return step

    def expand_axes(ax):
        x_increment = major_tick_step(ax, 'x') / 8.0

        min_x, max_x = ax.get_xlim()
        ax.set_xlim(min_x - x_increment, max_x + x_increment)

        y_increment = major_tick_step(ax, 'y') / 8.0
        min_y, max_y = ax.get_ylim()
        ax.set_ylim(min_y - y_increment, max_y + y_increment)

    def add_reversed_annotation(ax):
        min_x, max_x = ax.get_xlim()
        min_y, max_y = ax.get_ylim()

        offset_x = major_tick_step(ax, 'x')
        offset_y = major_tick_step(ax, 'y')
        ax.text(min_x + offset_x / 8, max_y - offset_y / 2 - offset_y / 8, '(reversed)')

    dist, cost, path = dtw_function(sequence_x, sequence_y, dist_only=False)

    sequence_x = np.asarray(sequence_x)
    sequence_y = np.asarray(sequence_y)

    try:
        ndim = sequence_x.shape[1]
    except IndexError:
        ndim = 1

    reversed = dtw_path_is_reversed(path)
    if reversed:
        sequence_x = reverse_sequence(sequence_x)
        path_x = np.max(path[0]) - path[0]
        path_y = path[1]
        path = (path_x, path_y)

    sequence_y_T = np.atleast_2d(sequence_y.T)
    sequence_x_T = np.atleast_2d(sequence_x.T)

    if columns is None and ndim > 1:
        columns = ['Dimension #{0}'.format(i) for i in range(1, ndim+1)]
    elif ndim > 1:
        if len(columns) != ndim:
            raise ValueError('Number of column titles does not match the number of dimensions')

    main_y_axis = None
    xaxes_regular = [None] * ndim
    xaxes_warped = [None] * ndim
    figure = plt.figure()
    figure.subplots_adjust(wspace=0.01, hspace=0.1)
    for i in range(ndim):
        x = sequence_x_T[i]
        print x.shape
        y = sequence_y_T[i]


        ax2 = plt.subplot(2, ndim, ndim + i + 1, sharey=main_y_axis, sharex=xaxes_warped[i])
        ax2.plot(y, color='g')

        if i > 0:
            ax2.yaxis.set_visible(False)

        expand_axes(ax2)

        if not main_y_axis:
            main_y_axis = ax2

        if not xaxes_warped[i]:
            xaxes_warped[i] = ax2

        ax1 = plt.subplot(2, ndim, i + 1, sharey=main_y_axis, sharex=xaxes_regular[i])
        ax1.plot(x, color='b')
        expand_axes(ax1)

        if ndim > 1:
            ax1.set_title(columns[i])

        if i > 0:
            ax1.yaxis.set_visible(False)


        if not xaxes_regular[i]:
            xaxes_regular[i] = ax1
        if reversed:
            add_reversed_annotation(ax1)

        for p_i, p_j in zip(path[0], path[1]):
            xy_a = (p_i, x[p_i])
            xy_b = (p_j, y[p_j])
            con = ConnectionPatch(xyA=xy_a, xyB=xy_b, coordsA="data", coordsB="data",
                                 axesA=ax1, axesB=ax2, arrowstyle="-", shrinkB=2, shrinkA=2, alpha=0.2)

            ax1.add_artist(con)

    if title is not None:
        plt.suptitle(title)

    lines = xaxes_regular[0].get_lines()
    lines.extend(xaxes_warped[0].get_lines())

    if sequence_x_label and sequence_y_label:
        plt.figlegend(lines, (sequence_x_label, sequence_y_label), 'lower center')

    return figure
Beispiel #9
0
def plot(alignments, titles=None, horizontal_grid=True,
         no_y_axis=False, sort_by=None, subplot_spec=None, share_y_axis=None, scale_y_axis=None, highlighted_points={},
         highlight_colours=None,
         rasterized=True, replace_with_dataframe=None):
    """

    :param alignments: `AlignmentsData` object
    :param titles: Titles of the heatmaps. If set to none `alignments.dataset_axis` will be used
    :param horizontal_grid: Whether to plot heatmap on a horizontal grid
    :param no_y_axis: Will not plot the major (items) axis.
    :param sort_by: Sort values by 'length' or supplied index.
    :param subplot_spec: SubplotSpec of the suplot to plot hte heatmaps in. See `matplotlib.gridspec` package.
    :param share_y_axis: if not None, the plot will share the major (items) axis with the specified axis
    :param scale_y_axis: plot will scale the y axis by the specified number (linearly) if set.
    :type scale_y_axis: int
    :param highlighted_points: a (index, array) dictionary of points that should be highlighted in the heatmap
    :param: rasterized: whether to rasterize the plot or not (faster rending for rasterized)
    :param replace_with_dataframe: replace the data with the dataframe provided. Use for toggleable overlays
    :return: returns the shared y axis
    """
    from dgw.util.plotting import pyplot as plt
    from matplotlib import gridspec

    number_of_datasets = alignments.number_of_datasets

    if titles is None:
        titles = alignments.dataset_axis

    # Should be OK just to just sort the first dataset as all others should have the same len
    sample_data_frame = alignments.dataset_xs(alignments.dataset_axis[0], copy=False).T
    lengths = sample_data_frame.apply(no_nans_len, axis=1)
    max_len = lengths.max()

    # Sorting
    if isinstance(sort_by, pd.Index):
        sorted_index = sort_by
    elif sort_by == 'length':
        debug('Sorting by length')
        lengths.sort()  # Should do in-place
        sorted_index = lengths.index
    elif sort_by is None:
        sorted_index = alignments.items
    else:
        raise ValueError('Unsupported sort_by value provided: {0!r}. Only None or \'length\' supported'.format(sort_by))
    # Apply sorting
    alignments = alignments.ix[sorted_index]
    # Create the instance of axis formatter
    tick_formatter = dataset_ticks(alignments, scale_y_axis)

    # Subplot creation

    if horizontal_grid:
        grid = (1, number_of_datasets + 1)
        width_ratios = [5] * number_of_datasets
        width_ratios.append(1)
        spacing_kwargs = {'wspace': 0.01, 'width_ratios': width_ratios}  # Almost no space between plots
    else:
        grid = (number_of_datasets, 2)
        spacing_kwargs = {'hspace': 0.15, 'width_ratios': [5, 1]} # Allow a bit of a space for title

    if not subplot_spec:
        gs = gridspec.GridSpec(*grid, **spacing_kwargs)
    else:
        gs = gridspec.GridSpecFromSubplotSpec(*grid, subplot_spec=subplot_spec, **spacing_kwargs)

    # Main drawing loop
    first_axis = None
    extent = None
    if scale_y_axis:
        extent = [0, max_len, 0, alignments.number_of_items * scale_y_axis] # Multiply by 10 as that is what matplotlib's dendrogram returns

    if highlighted_points:
        highlight_masks = defaultdict(lambda: np.zeros((alignments.number_of_items, max_len), dtype=np.bool))
        for i, ix in enumerate(sorted_index):
            try:
                points_of_interest = highlighted_points[ix]
            except KeyError:
                continue

            for j, points in points_of_interest.iteritems():
                highlight_masks[j][i][points] = 1

        for j in highlight_masks.iterkeys():
            highlight_masks[j] = np.ma.masked_where(highlight_masks[j] <= 0, highlight_masks[j])

    else:
        highlight_masks = None

    for i, (ix, title) in enumerate(zip(alignments.dataset_axis, titles)):
        t_gs = gs[:, i] if horizontal_grid else gs[i, 1]
        if i == 0:
            if not share_y_axis:
                first_axis = plt.subplot(t_gs)
                share_y_axis = first_axis
            else:
                first_axis = plt.subplot(t_gs, sharey=share_y_axis)
        else:
            # Remember to share axes
            plt.subplot(t_gs, sharex=first_axis, sharey=share_y_axis)

        plt.gca().get_yaxis().set_major_formatter(tick_formatter)
        #plt.gca().get_yaxis().set_major_locator(IndexLocator(1000, 0))
        # Remove redundant axes
        if horizontal_grid:
            if i > 0 or no_y_axis:
                plt.gca().get_yaxis().set_visible(False)
        else:
            # Leave only last axis
            if i + 1 < number_of_datasets:
                plt.gca().get_xaxis().set_visible(False)

            if no_y_axis:
                plt.gca().get_yaxis().set_visible(False)

        data_to_plot = alignments.dataset_xs(ix, copy=False).T
        # Cut most of the columns that are NaNs out of the plot
        data_to_plot = data_to_plot[data_to_plot.columns[:max_len]]

        if replace_with_dataframe is not None:
            result = raw_plot_data_as_heatmap(replace_with_dataframe, extent=extent,
                                              rasterized=rasterized)
        else:
            result = raw_plot_data_as_heatmap(data_to_plot, extent=extent,
                                          highlight_masks=highlight_masks, rasterized=rasterized, highlight_colours=highlight_colours)
        debug(plt.gca().get_ylim())
        debug(plt.gca().get_xlim())

        plt.title(title)
        plt.gca().title.set_fontsize(7)
        plt.gca().grid(False)
        plt.setp(plt.gca().get_xticklabels(), rotation='vertical', fontsize=7)

    # Colorbar
    colorbar_axis = plt.subplot(gs[:, number_of_datasets] if horizontal_grid else gs[:, 2])
    plt.colorbar(result, cax=colorbar_axis)

    return first_axis
Beispiel #10
0
 def ax_projected_heatmap(self):
     if self._ax_projected_heatmap is None:
         self._ax_projected_heatmap = plt.subplot(self.gs_projected_heatmap())
     return self._ax_projected_heatmap