Example #1
0
def plot_standard_correlation_coefficient():
    attr = [
        'median_house_value', 'median_income', 'total_rooms',
        'housing_median_age'
    ]
    scatter_matrix(data_training[attr], figsize=(12, 8))
    plt.show()
Example #2
0
def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
                   diagonal='hist', marker='.', density_kwds=None,
                   hist_kwds=None, range_padding=0.05, **kwds):
    """
    Draw a matrix of scatter plots.

    Parameters
    ----------
    frame : DataFrame
    alpha : float, optional
        amount of transparency applied
    figsize : (float,float), optional
        a tuple (width, height) in inches
    ax : Matplotlib axis object, optional
    grid : bool, optional
        setting this to True will show the grid
    diagonal : {'hist', 'kde'}
        pick between 'kde' and 'hist' for
        either Kernel Density Estimation or Histogram
        plot in the diagonal
    marker : str, optional
        Matplotlib marker type, default '.'
    hist_kwds : other plotting keyword arguments
        To be passed to hist function
    density_kwds : other plotting keyword arguments
        To be passed to kernel density estimate plot
    range_padding : float, optional
        relative extension of axis range in x and y
        with respect to (x_max - x_min) or (y_max - y_min),
        default 0.05
    kwds : other plotting keyword arguments
        To be passed to scatter function

    Returns
    -------
    numpy.ndarray
        A matrix of scatter plots.

    Examples
    --------
    >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D'])
    >>> scatter_matrix(df, alpha=0.2)
    """
    plot_backend = _get_plot_backend()
    return plot_backend.scatter_matrix(
        frame=frame, alpha=alpha, figsize=figsize, ax=ax, grid=grid,
        diagonal=diagonal, marker=marker, density_kwds=density_kwds,
        hist_kwds=hist_kwds, range_padding=range_padding, **kwds)
Example #3
0
# head
print(dataset.head(20))
# descriptions
print(dataset.describe())
# class distribution
print(dataset.groupby('class').size())
# Data visualisation
# box and whisker plots
dataset.plot(kind='box',
             subplots=True,
             layout=(2, 2),
             sharex=False,
             sharey=False)
pyplot.show()
# box and whisker plots
dataset.plot(kind='box',
             subplots=True,
             layout=(2, 2),
             sharex=False,
             sharey=False)
pyplot.show()
# histograms
dataset.hist()
pyplot.show()

# scatter plot matrix
scatter_matrix(dataset)
pyplot.show()
# scatter plot matrix
scatter_matrix(dataset)
pyplot.show()
Example #4
0
def scatter_matrix(frame,
                   alpha=0.5,
                   figsize=None,
                   ax=None,
                   grid=False,
                   diagonal='hist',
                   marker='.',
                   density_kwds=None,
                   hist_kwds=None,
                   range_padding=0.05,
                   **kwds):
    """
    Draw a matrix of scatter plots.

    Parameters
    ----------
    frame : DataFrame
    alpha : float, optional
        amount of transparency applied
    figsize : (float,float), optional
        a tuple (width, height) in inches
    ax : Matplotlib axis object, optional
    grid : bool, optional
        setting this to True will show the grid
    diagonal : {'hist', 'kde'}
        pick between 'kde' and 'hist' for
        either Kernel Density Estimation or Histogram
        plot in the diagonal
    marker : str, optional
        Matplotlib marker type, default '.'
    hist_kwds : other plotting keyword arguments
        To be passed to hist function
    density_kwds : other plotting keyword arguments
        To be passed to kernel density estimate plot
    range_padding : float, optional
        relative extension of axis range in x and y
        with respect to (x_max - x_min) or (y_max - y_min),
        default 0.05
    kwds : other plotting keyword arguments
        To be passed to scatter function

    Returns
    -------
    numpy.ndarray
        A matrix of scatter plots.

    Examples
    --------
    >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D'])
    >>> scatter_matrix(df, alpha=0.2)
    """
    plot_backend = _get_plot_backend()
    return plot_backend.scatter_matrix(frame=frame,
                                       alpha=alpha,
                                       figsize=figsize,
                                       ax=ax,
                                       grid=grid,
                                       diagonal=diagonal,
                                       marker=marker,
                                       density_kwds=density_kwds,
                                       hist_kwds=hist_kwds,
                                       range_padding=range_padding,
                                       **kwds)
Example #5
0
                       n_features=dimension)

with open("dataset.txt", "w") as file:
    for centroid in points:
        for value in range(dimension):
            if value == (dimension - 1):
                file.write(str(round(centroid[value], 4)))
            else:
                file.write(str(round(centroid[value], 4)) + ",")
        file.write("\n")

data = np.array(points)

# plot
df = pd.DataFrame(data, columns=['x1', 'x2', 'x3'])
scatter_matrix(df, alpha=0.2, figsize=(10, 10))

df = DataFrame(dict(x=points[:, 0], y=points[:, 1], label=y))
colors = {
    0: 'red',
    1: 'blue',
    2: 'green',
    3: 'black',
    4: 'purple',
    5: 'pink',
    6: 'orange'
}
fig, ax = pyplot.subplots()
grouped = df.groupby('label')
for key, group in grouped:
    group.plot(ax=ax,
Example #6
0
df.plot(kind='density', subplots=True, layout=(4, 4), sharex=False, fontsize=8)
plt.suptitle("Density", y=1.00, fontweight='bold')
plt.show()
# box and whisker plots
df.plot(kind='box',
        subplots=False,
        layout=(4, 4),
        sharex=False,
        sharey=False,
        fontsize=12)
plt.suptitle("Box and Whisker", y=1.00, fontweight='bold')
plt.show()
#endregion
#region Bivariant
# scatter plot matrix
scatter_matrix(df)
plt.suptitle("Scatter Matrix", y=1.00, fontweight='bold')
plt.show()
# correlation matrix
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(df.corr(), vmin=-1, vmax=1, interpolation='none')
fig.colorbar(cax)
ticks = numpy.arange(0, 13, 1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(names)
ax.set_yticklabels(names)
plt.suptitle("Correlation Matrix", y=1.00, fontweight='bold')
plt.show()
#endregion
Example #7
0
correlations = train_df.corr()
# Plot figsize
fig, ax = plt.subplots(figsize=(10, 10))
# Generate Color Map
colormap = sns.diverging_palette(220, 10, as_cmap=True)
# Generate Heat Map, allow annotations and place floats in map
sns.heatmap(correlations, cmap=colormap, annot=True, fmt=".2f")
ax.set_xticklabels(
    colum_names,
    rotation=45,
    horizontalalignment='right'
);
ax.set_yticklabels(colum_names);
plt.show()
# Scatterplot Matrix
sm = scatter_matrix(train_df, figsize=(6, 6), diagonal='kde')
#Change label rotation
[s.xaxis.label.set_rotation(40) for s in sm.reshape(-1)]
[s.yaxis.label.set_rotation(0) for s in sm.reshape(-1)]
#May need to offset label when rotating to prevent overlap of figure
[s.get_yaxis().set_label_coords(-0.6,0.5) for s in sm.reshape(-1)]
#Hide all ticks
[s.set_xticks(()) for s in sm.reshape(-1)]
[s.set_yticks(()) for s in sm.reshape(-1)]
plt.show()
# prepare configuration for cross validation test harness
seed = 7
# prepare models
models = []
models.append(('LogisticRegression', LogisticRegression()))
# evaluate the model
Example #8
0
def plot_scatter_matrix(dataframe):
    scatter_matrix(dataframe, figsize=(15, 11))