Esempio n. 1
0
def plot_dendrogram(data, labels=None, ax=None):
    """Plot a dendrogram of the given data.

    Parameters
    ----------
    data : 2d array
        Data to plot in a dendrogram.
    labels : list of str, optional
        Labels for the dendrogram.
    ax : matplotlib.Axes, optional
        Figure axes upon which to plot.

    Notes
    -----
    This function is a wrapper of the scipy `dendrogram` plot function.

    Examples
    --------
    See the example for the :meth:`~.compute_score` method of the :class:`~.Counts` class.
    """

    linkage_data = hier.linkage(data, method='complete', metric='cosine')

    hier.dendrogram(linkage_data,
                    orientation='left',
                    color_threshold=0.25,
                    leaf_font_size=12,
                    ax=check_ax(ax),
                    **check_args(['labels'], labels))
    plt.tight_layout()
Esempio n. 2
0
def plot_years(years, year_range=None, ax=None):
    """Plot a histogram of the number publications across years.

    Parameters
    ----------
    years : collections.Counter
        Data on the number of publications per year.
    year_range : list of [int, int], optional
        The range of years to plot on the x-axis.
    ax : matplotlib.Axes, optional
        Figure axes upon which to plot.
    """

    ax = check_ax(ax, (10, 5))

    # Extract x & y data to plot
    x_dat = list(years.keys())
    y_dat = list(years.values())

    # Add line and points to plot
    plt.plot(x_dat, y_dat)
    plt.plot(x_dat, y_dat, '.', markersize=16)

    # Set plot limits
    if year_range:
        plt.xlim([year_range[0], year_range[1]])
    plt.ylim([0, max(y_dat) + 3])

    # Add title & labels
    plt.title('Publication History', fontsize=24, fontweight='bold')
    plt.xlabel('Year of Publication', fontsize=18)
    plt.ylabel('Number of Articles', fontsize=18)
Esempio n. 3
0
def plot_matrix(data,
                x_labels=None,
                y_labels=None,
                attribute='score',
                transpose=False,
                cmap='purple',
                square=False,
                ax=None,
                **kwargs):
    """Plot a matrix as a heatmap.

    Parameters
    ----------
    data : Counts or 2d array
        Data to plot in matrix format.
    x_labels : list of str, optional
        Labels for the x-axis.
    y_labels : list of str, optional
        Labels for the y-axis.
    attribute : {'score', 'counts'}, optional
        Which data attribute from the counts object to plot the data for.
        Only used if the `data` input is a Counts object.
    transpose : bool, optional, default: False
        Whether to transpose the data before plotting.
    cmap : {'purple', 'blue'} or matplotlib.cmap
        Colormap to use for the plot.
        If string, uses a sequential palette of the specified color.
    square : bool
        Whether to plot all the cells as equally sized squares.
    ax : matplotlib.Axes, optional
        Figure axes upon which to plot.
    **kwargs
        Additional keyword arguments to pass through to seaborn.heatmap.

    Notes
    -----
    This function is a wrapper of the seaborn `heatmap` plot function.

    Examples
    --------
    See the example for the :meth:`~.compute_score` method of the :class:`~.Counts` class.
    """

    if isinstance(cmap, str):
        cmap = get_cmap(cmap)

    data, x_labels, y_labels = counts_data_helper(data, x_labels, y_labels,
                                                  attribute, transpose)

    with sns.plotting_context("notebook",
                              font_scale=kwargs.pop('font_scale', 1.0)):
        sns.heatmap(data,
                    square=square,
                    ax=check_ax(ax, kwargs.pop('figsize', None)),
                    cmap=cmap,
                    **check_args(['xticklabels', 'yticklabels'], x_labels,
                                 y_labels),
                    **kwargs)
    plt.tight_layout()
Esempio n. 4
0
def plot_dendrogram(data,
                    labels=None,
                    attribute='score',
                    transpose=False,
                    method='complete',
                    metric='cosine',
                    ax=None,
                    **kwargs):
    """Plot a dendrogram of the given data based on hierarchical clustering.

    Parameters
    ----------
    data : Counts or 2d array
        Data to plot in a dendrogram.
    labels : list of str, optional
        Labels for the dendrogram.
    attribute : {'score', 'counts'}, optional
        Which data attribute from the counts object to plot the data for.
        Only used if the `data` input is a Counts object.
    transpose : bool, optional, default: False
        Whether to transpose the data before plotting.
    method : str, optional, default: 'complete'
        The linkage algorithm to use. See `scipy.cluster.hierarchy.linkage` for options.
    metric : str or function, optional, default: 'cosine'
        The distance metric to use.  See `scipy.spatial.distance.pdist` for options.
    ax : matplotlib.Axes, optional
        Figure axes upon which to plot.
    **kwargs
        Additional keyword arguments to pass through to scipy.cluster.hierarchy.dendrogram.

    Notes
    -----
    This function is a wrapper of the `scipy.cluster.hierarchy.dendrogram' plot function.

    Examples
    --------
    See the example for the :meth:`~.compute_score` method of the :class:`~.Counts` class.
    """

    if isinstance(data, Counts):
        labels = data.terms['A' if not transpose else 'B'].labels
        data = getattr(data, attribute).T if transpose else getattr(
            data, attribute)

    linkage_data = hier.linkage(data, method=method, metric=metric)

    with sns.plotting_context("notebook",
                              font_scale=kwargs.pop('font_scale', 1.0)):
        hier.dendrogram(linkage_data,
                        orientation=kwargs.pop('orientation', 'left'),
                        color_threshold=kwargs.pop('color_threshold', 0.25),
                        leaf_font_size=kwargs.pop('leaf_font_size', 12),
                        ax=check_ax(ax, kwargs.pop('figsize', None)),
                        **check_args(['labels'], labels),
                        **kwargs)
    plt.tight_layout()
Esempio n. 5
0
def plot_vector(data,
                dim='A',
                transpose=False,
                cmap='purple',
                ax=None,
                **kwargs):
    """Plot a vector as an annotated heatmap.

    Parameters
    ----------
    data : Counts or 1d array
        Data to plot as a heatmap.
    dim : {'A', 'B'}, optional
        Which set of terms to plot.
        Only used if `data` is a `Counts` object.
    transpose : bool, optional, default: False
        Whether to transpose the data before plotting.
    cmap : {'purple', 'blue'} or matplotlib.cmap
        Colormap to use for the plot.
        If string, uses a sequential palette of the specified color.
    ax : matplotlib.Axes, optional
        Figure axes upon which to plot.
    **kwargs
        Additional keyword arguments to pass through to seaborn.heatmap.
    """

    if isinstance(cmap, str):
        cmap = get_cmap(cmap)

    if isinstance(data, Counts):
        data = data.terms[dim].counts
    if data.ndim == 1:
        data = np.expand_dims(data, 1)
    if transpose:
        data = data.T

    sns.heatmap(data,
                cmap=cmap,
                square=kwargs.pop('square', True),
                annot=kwargs.pop('annot', True),
                fmt=kwargs.pop('fmt', 'd'),
                annot_kws={"size": 18},
                cbar=kwargs.pop('cbar', False),
                xticklabels=kwargs.pop('xticklabels', []),
                yticklabels=kwargs.pop('yticklabels', []),
                ax=check_ax(ax, kwargs.pop('figsize', None)),
                **kwargs)
Esempio n. 6
0
def plot_wordcloud(freq_dist, n_words, ax=None):
    """Plot a wordcloud.

    Parameters
    ----------
    freq_dist : nltk.FreqDist
        Frequency distribution of words to plot.
    n_words : int
        Number of top words to include in the wordcloud.
    ax : matplotlib.Axes, optional
        Figure axes upon which to plot.
    """

    cloud = create_wordcloud(conv_freqs(freq_dist, n_words))

    ax = check_ax(ax, (8, 8))
    ax.imshow(cloud)
    ax.axis("off")
Esempio n. 7
0
def plot_matrix(data,
                x_labels=None,
                y_labels=None,
                cmap='purple',
                square=False,
                ax=None):
    """Plot a matrix representation of given data.

    Parameters
    ----------
    data : 2d array
        Data to plot in matrix format.
    x_labels : list of str
        Labels for the x-axis.
    y_labels : list of str
        Labels for the y-axis.
    cmap : {'purple', 'blue'} or matplotlib.cmap
        Colormap to use for the plot.
        If string, uses a sequential palette of the specified color.
    square : bool
        Whether to plot all the cells as equally sized squares.
    ax : matplotlib.Axes, optional
        Figure axes upon which to plot.

    Notes
    -----
    This function is a wrapper of the seaborn `heatmap` plot function.

    Examples
    --------
    See the example for the :meth:`~.compute_score` method of the :class:`~.Counts` class.
    """

    if isinstance(cmap, str):
        cmap = get_cmap(cmap)

    sns.heatmap(data,
                square=square,
                ax=check_ax(ax),
                cmap=cmap,
                **check_args(['xticklabels', 'yticklabels'], x_labels,
                             y_labels))
    plt.tight_layout()
Esempio n. 8
0
def plot_wordcloud(freq_dist, n_words, ax=None, **plt_kwargs):
    """Plot a wordcloud.

    Parameters
    ----------
    freq_dist : collections.Counter
        Frequency distribution of words to plot.
    n_words : int
        Number of top words to include in the wordcloud.
    ax : matplotlib.Axes, optional
        Figure axes upon which to plot.
    plt_kwargs
        Additional keyword arguments for the plot.

    Examples
    --------
    See the :meth:`~.create_freq_dist` method of the :class:`~.ArticlesAll` object.
    """

    cloud = create_wordcloud(conv_freqs(freq_dist, n_words))

    ax = check_ax(ax, plt_kwargs.pop('figsize', (8, 8)))
    ax.imshow(cloud, **plt_kwargs)
    ax.axis("off")
Esempio n. 9
0
def plot_years(years, year_range=None, ax=None, **plt_kwargs):
    """Plot a histogram of the number publications across years.

    Parameters
    ----------
    years : collections.Counter
        Data on the number of publications per year.
    year_range : list of [int, int], optional
        The range of years to plot on the x-axis, inclusive.
    ax : matplotlib.Axes, optional
        Figure axes upon which to plot.
    plt_kwargs
        Additional keyword arguments for the plot.

    Examples
    --------
    Plot a histogram of publication years:

    >>> from collections import Counter
    >>> plot_years(years=Counter({'2018': 25, '2019': 50, '2020':75}))

    Notes
    -----
    Publication years are collected together in the :class:`~.ArticlesAll` class.
    """

    ax = check_ax(ax, plt_kwargs.pop('figsize', (10, 5)))

    # Get the plot data, making sure it is sorted
    sort_inds = np.argsort(list(years.keys()))
    x_data = np.array(list(years.keys()))[sort_inds]
    y_data = np.array(list(years.values()))[sort_inds]

    # Restrict the data to the desired plot range
    if year_range:
        range_inds = np.logical_and(
            x_data >= (year_range[0] if year_range[0] else -np.inf), x_data <=
            (year_range[1] if year_range[1] else np.inf))
        x_data = x_data[range_inds]
        y_data = y_data[range_inds]

    # Grab any plot inputs for labels
    fontsize = plt_kwargs.pop('fontsize', 18)
    xlabel = plt_kwargs.pop('xlabel', 'Year of Publication')
    ylabel = plt_kwargs.pop('ylabel', 'Number of Articles')

    # Add line and points to plot
    ax.plot(x_data,
            y_data,
            linewidth=check_aliases(plt_kwargs, ['linewidth', 'lw'], 3),
            marker=plt_kwargs.pop('marker', '.'),
            markersize=check_aliases(plt_kwargs, ['markersize', 'ms'], 10),
            markerfacecolor=plt_kwargs.pop('markerfacecolor', 'white'),
            **plt_kwargs)

    # Set plot limits
    ax.set_ylim(
        [0, max(y_data) + int(np.ceil(0.03 * (max(y_data) - min(y_data))))])

    # Add title & labels
    ax.set_xlabel(xlabel, fontsize=fontsize)
    ax.set_ylabel(ylabel, fontsize=fontsize)