def plot_standard_correlation_coefficient(): attr = [ 'median_house_value', 'median_income', 'total_rooms', 'housing_median_age' ] scatter_matrix(data_training[attr], figsize=(12, 8)) plt.show()
def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, diagonal='hist', marker='.', density_kwds=None, hist_kwds=None, range_padding=0.05, **kwds): """ Draw a matrix of scatter plots. Parameters ---------- frame : DataFrame alpha : float, optional amount of transparency applied figsize : (float,float), optional a tuple (width, height) in inches ax : Matplotlib axis object, optional grid : bool, optional setting this to True will show the grid diagonal : {'hist', 'kde'} pick between 'kde' and 'hist' for either Kernel Density Estimation or Histogram plot in the diagonal marker : str, optional Matplotlib marker type, default '.' hist_kwds : other plotting keyword arguments To be passed to hist function density_kwds : other plotting keyword arguments To be passed to kernel density estimate plot range_padding : float, optional relative extension of axis range in x and y with respect to (x_max - x_min) or (y_max - y_min), default 0.05 kwds : other plotting keyword arguments To be passed to scatter function Returns ------- numpy.ndarray A matrix of scatter plots. Examples -------- >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) >>> scatter_matrix(df, alpha=0.2) """ plot_backend = _get_plot_backend() return plot_backend.scatter_matrix( frame=frame, alpha=alpha, figsize=figsize, ax=ax, grid=grid, diagonal=diagonal, marker=marker, density_kwds=density_kwds, hist_kwds=hist_kwds, range_padding=range_padding, **kwds)
# head print(dataset.head(20)) # descriptions print(dataset.describe()) # class distribution print(dataset.groupby('class').size()) # Data visualisation # box and whisker plots dataset.plot(kind='box', subplots=True, layout=(2, 2), sharex=False, sharey=False) pyplot.show() # box and whisker plots dataset.plot(kind='box', subplots=True, layout=(2, 2), sharex=False, sharey=False) pyplot.show() # histograms dataset.hist() pyplot.show() # scatter plot matrix scatter_matrix(dataset) pyplot.show() # scatter plot matrix scatter_matrix(dataset) pyplot.show()
def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, diagonal='hist', marker='.', density_kwds=None, hist_kwds=None, range_padding=0.05, **kwds): """ Draw a matrix of scatter plots. Parameters ---------- frame : DataFrame alpha : float, optional amount of transparency applied figsize : (float,float), optional a tuple (width, height) in inches ax : Matplotlib axis object, optional grid : bool, optional setting this to True will show the grid diagonal : {'hist', 'kde'} pick between 'kde' and 'hist' for either Kernel Density Estimation or Histogram plot in the diagonal marker : str, optional Matplotlib marker type, default '.' hist_kwds : other plotting keyword arguments To be passed to hist function density_kwds : other plotting keyword arguments To be passed to kernel density estimate plot range_padding : float, optional relative extension of axis range in x and y with respect to (x_max - x_min) or (y_max - y_min), default 0.05 kwds : other plotting keyword arguments To be passed to scatter function Returns ------- numpy.ndarray A matrix of scatter plots. Examples -------- >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) >>> scatter_matrix(df, alpha=0.2) """ plot_backend = _get_plot_backend() return plot_backend.scatter_matrix(frame=frame, alpha=alpha, figsize=figsize, ax=ax, grid=grid, diagonal=diagonal, marker=marker, density_kwds=density_kwds, hist_kwds=hist_kwds, range_padding=range_padding, **kwds)
n_features=dimension) with open("dataset.txt", "w") as file: for centroid in points: for value in range(dimension): if value == (dimension - 1): file.write(str(round(centroid[value], 4))) else: file.write(str(round(centroid[value], 4)) + ",") file.write("\n") data = np.array(points) # plot df = pd.DataFrame(data, columns=['x1', 'x2', 'x3']) scatter_matrix(df, alpha=0.2, figsize=(10, 10)) df = DataFrame(dict(x=points[:, 0], y=points[:, 1], label=y)) colors = { 0: 'red', 1: 'blue', 2: 'green', 3: 'black', 4: 'purple', 5: 'pink', 6: 'orange' } fig, ax = pyplot.subplots() grouped = df.groupby('label') for key, group in grouped: group.plot(ax=ax,
df.plot(kind='density', subplots=True, layout=(4, 4), sharex=False, fontsize=8) plt.suptitle("Density", y=1.00, fontweight='bold') plt.show() # box and whisker plots df.plot(kind='box', subplots=False, layout=(4, 4), sharex=False, sharey=False, fontsize=12) plt.suptitle("Box and Whisker", y=1.00, fontweight='bold') plt.show() #endregion #region Bivariant # scatter plot matrix scatter_matrix(df) plt.suptitle("Scatter Matrix", y=1.00, fontweight='bold') plt.show() # correlation matrix fig = plt.figure() ax = fig.add_subplot(111) cax = ax.matshow(df.corr(), vmin=-1, vmax=1, interpolation='none') fig.colorbar(cax) ticks = numpy.arange(0, 13, 1) ax.set_xticks(ticks) ax.set_yticks(ticks) ax.set_xticklabels(names) ax.set_yticklabels(names) plt.suptitle("Correlation Matrix", y=1.00, fontweight='bold') plt.show() #endregion
correlations = train_df.corr() # Plot figsize fig, ax = plt.subplots(figsize=(10, 10)) # Generate Color Map colormap = sns.diverging_palette(220, 10, as_cmap=True) # Generate Heat Map, allow annotations and place floats in map sns.heatmap(correlations, cmap=colormap, annot=True, fmt=".2f") ax.set_xticklabels( colum_names, rotation=45, horizontalalignment='right' ); ax.set_yticklabels(colum_names); plt.show() # Scatterplot Matrix sm = scatter_matrix(train_df, figsize=(6, 6), diagonal='kde') #Change label rotation [s.xaxis.label.set_rotation(40) for s in sm.reshape(-1)] [s.yaxis.label.set_rotation(0) for s in sm.reshape(-1)] #May need to offset label when rotating to prevent overlap of figure [s.get_yaxis().set_label_coords(-0.6,0.5) for s in sm.reshape(-1)] #Hide all ticks [s.set_xticks(()) for s in sm.reshape(-1)] [s.set_yticks(()) for s in sm.reshape(-1)] plt.show() # prepare configuration for cross validation test harness seed = 7 # prepare models models = [] models.append(('LogisticRegression', LogisticRegression())) # evaluate the model
def plot_scatter_matrix(dataframe): scatter_matrix(dataframe, figsize=(15, 11))