Example #1
0
def plot(df, idx, weights, mappers=None, cols=4):
    """
    Plot bar plots for all columns in the DataFrame.

    Parameters
    ----------
    df : pd.DataFrame
        A pandas DataFrame object with the data.

    idx : iterable
        An iterable containing the indices of selected participants.

    weights : optional, dict {column: float}
        Weighting over dataframe columns
        By default, a uniform weighting is used

    mappers : optional, dict {column: entrofy.BaseMapper}
        Dictionary mapping dataframe columns to BaseMapper objects

    Returns
    -------
    fig_all : list of matplotlib.Figure objects
        The list containing all Figure objects with the plots.

    """
    if mappers is None:
        mappers = construct_mappers(df, weights)

    columns = list(mappers.keys())
    ncolumns = len(columns)

    rows = np.floor(ncolumns/cols)
    if (ncolumns % cols) > 0:
        rows += 1

    fig = plt.figure(figsize=(4*cols, 3*rows))

    axes = []

    for i,c in enumerate(columns):
        ax = fig.add_subplot(rows, cols, i+1)
        ax, _ = plot_fractions(df[c], idx, c, mappers[c], ax=ax)
        axes.append(ax)

    plt.tight_layout()
    return axes
Example #2
0
def plot(df, idx, weights, mappers=None, cols=4):
    """
    Plot bar plots for all columns in the DataFrame.

    Parameters
    ----------
    df : pd.DataFrame
        A pandas DataFrame object with the data.

    idx : iterable
        An iterable containing the indices of selected participants.

    weights : optional, dict {column: float}
        Weighting over dataframe columns
        By default, a uniform weighting is used

    mappers : optional, dict {column: entrofy.BaseMapper}
        Dictionary mapping dataframe columns to BaseMapper objects

    Returns
    -------
    fig_all : list of matplotlib.Figure objects
        The list containing all Figure objects with the plots.

    """
    if mappers is None:
        mappers = construct_mappers(df, weights)

    columns = list(mappers.keys())
    ncolumns = len(columns)

    rows = np.floor(ncolumns/cols)
    if (ncolumns % cols) > 0:
        rows += 1

    fig = plt.figure(figsize=(4*cols, 3*rows))

    axes = []

    for i,c in enumerate(columns):
        ax = fig.add_subplot(rows, cols, i+1)
        ax, _ = plot_fractions(df[c], idx, c, mappers[c], ax=ax)
        axes.append(ax)

    plt.tight_layout()
    return axes
Example #3
0
def plot_triangle(df, weights, mappers=None, cmap="YlGnBu", bins=30,
                  prefac=10., cat_type="box", cont_type="hist", fig=None):
    """
    Make a triangle plot of all the relevant columns in the DataFrame.

    Parameters
    ----------
    df : pd.DataFrame
        A pandas DataFrame with the data

    weights : optional, dict {column: float}
        Weighting over dataframe columns
        By default, a uniform weighting is used

    mappers : optional, dict {column: entrofy.BaseMapper}
        Dictionary mapping dataframe columns to BaseMapper objects

    cmap : matplotlib.cm.colormap
        A matplotlib colormap to use for shading the bubbles

    bins : int
        The number of bins for the histogram.

    prefac : float
        A pre-factor steering the shading of the bubbles

    cat_type : {"box" | "strip" | "swarm" | "violin" | "categorical"}
        The type of plot for any plot including both categorical and continuous
        data.

    cont_type : {"kde" | "scatter"}
        The type of plot to produce. Either a kernel density estimate ("kde")
        or a scatter plor ("scatter").

    fig : matplotlib.Figure object
        A Figure object to plot in.

    Returns
    -------
    fig : matplotlib.Figure object
        The Figure object

    axes : list
        A list of matplotlib.Axes objects

    """

    # if mappers are None, construct them with some default settings
    if mappers is None:
        mappers = construct_mappers(df, weights)

    # the keys
    keys = np.sort(list(mappers.keys()))

    # the number of panels I'll need
    nkeys = len(keys)

    # determine the types:
    all_types = []
    for k in keys:
        if isinstance(mappers[k], ObjectMapper):
            all_types.append("categorical")
        elif isinstance(mappers[k], ContinuousMapper):
            all_types.append("continuous")

        else:
            raise Exception("Data type not recognized!")

    # construct the figure
    if fig is None:
        fig, axes = plt.subplots(nkeys, nkeys, figsize=(4*nkeys, 3*nkeys))

    else:
        axes = []
        k = 1
        for i in range(nkeys):
            for j in range(nkeys):
                axes.append(fig.add_subplot(nkeys, nkeys, k))
                k += 1

    axes = np.array(axes).reshape((nkeys, nkeys))

    for i, kx in enumerate(keys):
        for j, ky in enumerate(keys):
            xtype = all_types[i]
            ytype = all_types[j]

            # upper triangle: print white space
            if i < j:
                axes[i,j].spines['right'].set_visible(False)
                axes[i,j].spines['top'].set_visible(False)
                axes[i,j].spines['left'].set_visible(False)
                axes[i,j].spines['bottom'].set_visible(False)
                axes[i,j].set_axis_bgcolor('white')
                axes[i,j].set_xlabel("")
                axes[i,j].set_ylabel("")
                axes[i,j].axis('off')
                continue

            # diagonal: plot the univariate distribution
            elif i == j:
                axes[i,j] = plot_distribution(df, kx, xmapper=mappers[kx],
                                              xtype=xtype, ax=axes[i,j],
                                              cmap=cmap, bins=bins)

            # upper triangle: plot the bivariate distributions
            else:
                axes[i,j] = plot_correlation(df, ky, kx, xmapper=mappers[ky],
                                             ymapper=mappers[kx], ax=axes[i,j],
                                             cmap=cmap, xtype=ytype,
                                             ytype=xtype, prefac=prefac,
                                             cat_type=cat_type,
                                             cont_type=cont_type)
            if i < nkeys - 1:
                axes[i,j].set_xticklabels([])
                axes[i,j].set_xlabel("")
            else:
                [l.set_rotation(45) for l in axes[i,j].get_xticklabels()]

            if j > 0:
                axes[i,j].set_yticklabels([])
                if i != j:
                    axes[i,j].set_ylabel("")

    plt.tight_layout()

    return fig, axes
Example #4
0
def plot_triangle(df, weights, mappers=None, cmap="YlGnBu", bins=30,
                  prefac=10., cat_type="box", cont_type="hist", fig=None):
    """
    Make a triangle plot of all the relevant columns in the DataFrame.

    Parameters
    ----------
    df : pd.DataFrame
        A pandas DataFrame with the data

    weights : optional, dict {column: float}
        Weighting over dataframe columns
        By default, a uniform weighting is used

    mappers : optional, dict {column: entrofy.BaseMapper}
        Dictionary mapping dataframe columns to BaseMapper objects

    cmap : matplotlib.cm.colormap
        A matplotlib colormap to use for shading the bubbles

    bins : int
        The number of bins for the histogram.

    prefac : float
        A pre-factor steering the shading of the bubbles

    cat_type : {"box" | "strip" | "swarm" | "violin" | "categorical"}
        The type of plot for any plot including both categorical and continuous
        data.

    cont_type : {"kde" | "scatter"}
        The type of plot to produce. Either a kernel density estimate ("kde")
        or a scatter plor ("scatter").

    fig : matplotlib.Figure object
        A Figure object to plot in.

    Returns
    -------
    fig : matplotlib.Figure object
        The Figure object

    axes : list
        A list of matplotlib.Axes objects

    """

    # if mappers are None, construct them with some default settings
    if mappers is None:
        mappers = construct_mappers(df, weights)

    # the keys
    keys = np.sort(list(mappers.keys()))

    # the number of panels I'll need
    nkeys = len(keys)

    # determine the types:
    all_types = []
    for k in keys:
        if isinstance(mappers[k], ObjectMapper):
            all_types.append("categorical")
        elif isinstance(mappers[k], ContinuousMapper):
            all_types.append("continuous")

        else:
            raise Exception("Data type not recognized!")

    # construct the figure
    if fig is None:
        fig, axes = plt.subplots(nkeys, nkeys, figsize=(4*nkeys, 3*nkeys))

    else:
        axes = []
        k = 1
        for i in range(nkeys):
            for j in range(nkeys):
                axes.append(fig.add_subplot(nkeys, nkeys, k))
                k += 1

    axes = np.array(axes).reshape((nkeys, nkeys))

    for i, kx in enumerate(keys):
        for j, ky in enumerate(keys):
            xtype = all_types[i]
            ytype = all_types[j]

            # upper triangle: print white space
            if i < j:
                axes[i,j].spines['right'].set_visible(False)
                axes[i,j].spines['top'].set_visible(False)
                axes[i,j].spines['left'].set_visible(False)
                axes[i,j].spines['bottom'].set_visible(False)
                axes[i,j].set_axis_bgcolor('white')
                axes[i,j].set_xlabel("")
                axes[i,j].set_ylabel("")
                axes[i,j].axis('off')
                continue

            # diagonal: plot the univariate distribution
            elif i == j:
                axes[i,j] = plot_distribution(df, kx, xmapper=mappers[kx],
                                              xtype=xtype, ax=axes[i,j],
                                              cmap=cmap, bins=bins)

            # upper triangle: plot the bivariate distributions
            else:
                axes[i,j] = plot_correlation(df, ky, kx, xmapper=mappers[ky],
                                             ymapper=mappers[kx], ax=axes[i,j],
                                             cmap=cmap, xtype=ytype,
                                             ytype=xtype, prefac=prefac,
                                             cat_type=cat_type,
                                             cont_type=cont_type)
            if i < nkeys - 1:
                axes[i,j].set_xticklabels([])
                axes[i,j].set_xlabel("")
            else:
                [l.set_rotation(45) for l in axes[i,j].get_xticklabels()]

            if j > 0:
                axes[i,j].set_yticklabels([])
                if i != j:
                    axes[i,j].set_ylabel("")

    plt.tight_layout()

    return fig, axes