def plot_dendrogram(data, labels=None, ax=None): """Plot a dendrogram of the given data. Parameters ---------- data : 2d array Data to plot in a dendrogram. labels : list of str, optional Labels for the dendrogram. ax : matplotlib.Axes, optional Figure axes upon which to plot. Notes ----- This function is a wrapper of the scipy `dendrogram` plot function. Examples -------- See the example for the :meth:`~.compute_score` method of the :class:`~.Counts` class. """ linkage_data = hier.linkage(data, method='complete', metric='cosine') hier.dendrogram(linkage_data, orientation='left', color_threshold=0.25, leaf_font_size=12, ax=check_ax(ax), **check_args(['labels'], labels)) plt.tight_layout()
def plot_years(years, year_range=None, ax=None): """Plot a histogram of the number publications across years. Parameters ---------- years : collections.Counter Data on the number of publications per year. year_range : list of [int, int], optional The range of years to plot on the x-axis. ax : matplotlib.Axes, optional Figure axes upon which to plot. """ ax = check_ax(ax, (10, 5)) # Extract x & y data to plot x_dat = list(years.keys()) y_dat = list(years.values()) # Add line and points to plot plt.plot(x_dat, y_dat) plt.plot(x_dat, y_dat, '.', markersize=16) # Set plot limits if year_range: plt.xlim([year_range[0], year_range[1]]) plt.ylim([0, max(y_dat) + 3]) # Add title & labels plt.title('Publication History', fontsize=24, fontweight='bold') plt.xlabel('Year of Publication', fontsize=18) plt.ylabel('Number of Articles', fontsize=18)
def plot_matrix(data, x_labels=None, y_labels=None, attribute='score', transpose=False, cmap='purple', square=False, ax=None, **kwargs): """Plot a matrix as a heatmap. Parameters ---------- data : Counts or 2d array Data to plot in matrix format. x_labels : list of str, optional Labels for the x-axis. y_labels : list of str, optional Labels for the y-axis. attribute : {'score', 'counts'}, optional Which data attribute from the counts object to plot the data for. Only used if the `data` input is a Counts object. transpose : bool, optional, default: False Whether to transpose the data before plotting. cmap : {'purple', 'blue'} or matplotlib.cmap Colormap to use for the plot. If string, uses a sequential palette of the specified color. square : bool Whether to plot all the cells as equally sized squares. ax : matplotlib.Axes, optional Figure axes upon which to plot. **kwargs Additional keyword arguments to pass through to seaborn.heatmap. Notes ----- This function is a wrapper of the seaborn `heatmap` plot function. Examples -------- See the example for the :meth:`~.compute_score` method of the :class:`~.Counts` class. """ if isinstance(cmap, str): cmap = get_cmap(cmap) data, x_labels, y_labels = counts_data_helper(data, x_labels, y_labels, attribute, transpose) with sns.plotting_context("notebook", font_scale=kwargs.pop('font_scale', 1.0)): sns.heatmap(data, square=square, ax=check_ax(ax, kwargs.pop('figsize', None)), cmap=cmap, **check_args(['xticklabels', 'yticklabels'], x_labels, y_labels), **kwargs) plt.tight_layout()
def plot_dendrogram(data, labels=None, attribute='score', transpose=False, method='complete', metric='cosine', ax=None, **kwargs): """Plot a dendrogram of the given data based on hierarchical clustering. Parameters ---------- data : Counts or 2d array Data to plot in a dendrogram. labels : list of str, optional Labels for the dendrogram. attribute : {'score', 'counts'}, optional Which data attribute from the counts object to plot the data for. Only used if the `data` input is a Counts object. transpose : bool, optional, default: False Whether to transpose the data before plotting. method : str, optional, default: 'complete' The linkage algorithm to use. See `scipy.cluster.hierarchy.linkage` for options. metric : str or function, optional, default: 'cosine' The distance metric to use. See `scipy.spatial.distance.pdist` for options. ax : matplotlib.Axes, optional Figure axes upon which to plot. **kwargs Additional keyword arguments to pass through to scipy.cluster.hierarchy.dendrogram. Notes ----- This function is a wrapper of the `scipy.cluster.hierarchy.dendrogram' plot function. Examples -------- See the example for the :meth:`~.compute_score` method of the :class:`~.Counts` class. """ if isinstance(data, Counts): labels = data.terms['A' if not transpose else 'B'].labels data = getattr(data, attribute).T if transpose else getattr( data, attribute) linkage_data = hier.linkage(data, method=method, metric=metric) with sns.plotting_context("notebook", font_scale=kwargs.pop('font_scale', 1.0)): hier.dendrogram(linkage_data, orientation=kwargs.pop('orientation', 'left'), color_threshold=kwargs.pop('color_threshold', 0.25), leaf_font_size=kwargs.pop('leaf_font_size', 12), ax=check_ax(ax, kwargs.pop('figsize', None)), **check_args(['labels'], labels), **kwargs) plt.tight_layout()
def plot_vector(data, dim='A', transpose=False, cmap='purple', ax=None, **kwargs): """Plot a vector as an annotated heatmap. Parameters ---------- data : Counts or 1d array Data to plot as a heatmap. dim : {'A', 'B'}, optional Which set of terms to plot. Only used if `data` is a `Counts` object. transpose : bool, optional, default: False Whether to transpose the data before plotting. cmap : {'purple', 'blue'} or matplotlib.cmap Colormap to use for the plot. If string, uses a sequential palette of the specified color. ax : matplotlib.Axes, optional Figure axes upon which to plot. **kwargs Additional keyword arguments to pass through to seaborn.heatmap. """ if isinstance(cmap, str): cmap = get_cmap(cmap) if isinstance(data, Counts): data = data.terms[dim].counts if data.ndim == 1: data = np.expand_dims(data, 1) if transpose: data = data.T sns.heatmap(data, cmap=cmap, square=kwargs.pop('square', True), annot=kwargs.pop('annot', True), fmt=kwargs.pop('fmt', 'd'), annot_kws={"size": 18}, cbar=kwargs.pop('cbar', False), xticklabels=kwargs.pop('xticklabels', []), yticklabels=kwargs.pop('yticklabels', []), ax=check_ax(ax, kwargs.pop('figsize', None)), **kwargs)
def plot_wordcloud(freq_dist, n_words, ax=None): """Plot a wordcloud. Parameters ---------- freq_dist : nltk.FreqDist Frequency distribution of words to plot. n_words : int Number of top words to include in the wordcloud. ax : matplotlib.Axes, optional Figure axes upon which to plot. """ cloud = create_wordcloud(conv_freqs(freq_dist, n_words)) ax = check_ax(ax, (8, 8)) ax.imshow(cloud) ax.axis("off")
def plot_matrix(data, x_labels=None, y_labels=None, cmap='purple', square=False, ax=None): """Plot a matrix representation of given data. Parameters ---------- data : 2d array Data to plot in matrix format. x_labels : list of str Labels for the x-axis. y_labels : list of str Labels for the y-axis. cmap : {'purple', 'blue'} or matplotlib.cmap Colormap to use for the plot. If string, uses a sequential palette of the specified color. square : bool Whether to plot all the cells as equally sized squares. ax : matplotlib.Axes, optional Figure axes upon which to plot. Notes ----- This function is a wrapper of the seaborn `heatmap` plot function. Examples -------- See the example for the :meth:`~.compute_score` method of the :class:`~.Counts` class. """ if isinstance(cmap, str): cmap = get_cmap(cmap) sns.heatmap(data, square=square, ax=check_ax(ax), cmap=cmap, **check_args(['xticklabels', 'yticklabels'], x_labels, y_labels)) plt.tight_layout()
def plot_wordcloud(freq_dist, n_words, ax=None, **plt_kwargs): """Plot a wordcloud. Parameters ---------- freq_dist : collections.Counter Frequency distribution of words to plot. n_words : int Number of top words to include in the wordcloud. ax : matplotlib.Axes, optional Figure axes upon which to plot. plt_kwargs Additional keyword arguments for the plot. Examples -------- See the :meth:`~.create_freq_dist` method of the :class:`~.ArticlesAll` object. """ cloud = create_wordcloud(conv_freqs(freq_dist, n_words)) ax = check_ax(ax, plt_kwargs.pop('figsize', (8, 8))) ax.imshow(cloud, **plt_kwargs) ax.axis("off")
def plot_years(years, year_range=None, ax=None, **plt_kwargs): """Plot a histogram of the number publications across years. Parameters ---------- years : collections.Counter Data on the number of publications per year. year_range : list of [int, int], optional The range of years to plot on the x-axis, inclusive. ax : matplotlib.Axes, optional Figure axes upon which to plot. plt_kwargs Additional keyword arguments for the plot. Examples -------- Plot a histogram of publication years: >>> from collections import Counter >>> plot_years(years=Counter({'2018': 25, '2019': 50, '2020':75})) Notes ----- Publication years are collected together in the :class:`~.ArticlesAll` class. """ ax = check_ax(ax, plt_kwargs.pop('figsize', (10, 5))) # Get the plot data, making sure it is sorted sort_inds = np.argsort(list(years.keys())) x_data = np.array(list(years.keys()))[sort_inds] y_data = np.array(list(years.values()))[sort_inds] # Restrict the data to the desired plot range if year_range: range_inds = np.logical_and( x_data >= (year_range[0] if year_range[0] else -np.inf), x_data <= (year_range[1] if year_range[1] else np.inf)) x_data = x_data[range_inds] y_data = y_data[range_inds] # Grab any plot inputs for labels fontsize = plt_kwargs.pop('fontsize', 18) xlabel = plt_kwargs.pop('xlabel', 'Year of Publication') ylabel = plt_kwargs.pop('ylabel', 'Number of Articles') # Add line and points to plot ax.plot(x_data, y_data, linewidth=check_aliases(plt_kwargs, ['linewidth', 'lw'], 3), marker=plt_kwargs.pop('marker', '.'), markersize=check_aliases(plt_kwargs, ['markersize', 'ms'], 10), markerfacecolor=plt_kwargs.pop('markerfacecolor', 'white'), **plt_kwargs) # Set plot limits ax.set_ylim( [0, max(y_data) + int(np.ceil(0.03 * (max(y_data) - min(y_data))))]) # Add title & labels ax.set_xlabel(xlabel, fontsize=fontsize) ax.set_ylabel(ylabel, fontsize=fontsize)