def _update_plot(self, axis, view): if self.plot_type == 'regplot': sns.regplot(x=view.x, y=view.y, data=view.data, ax=axis, **self.style) elif self.plot_type == 'boxplot': self.style.pop('return_type', None) self.style.pop('figsize', None) sns.boxplot(view.data[view.y], view.data[view.x], ax=axis, **self.style) elif self.plot_type == 'violinplot': sns.violinplot(view.data[view.y], view.data[view.x], ax=axis, **self.style) elif self.plot_type == 'interact': sns.interactplot(view.x, view.x2, view.y, data=view.data, ax=axis, **self.style) elif self.plot_type == 'corrplot': sns.corrplot(view.data, ax=axis, **self.style) elif self.plot_type == 'lmplot': sns.lmplot(x=view.x, y=view.y, data=view.data, ax=axis, **self.style) elif self.plot_type in ['pairplot', 'pairgrid', 'facetgrid']: map_opts = [(k, self.style.pop(k)) for k in self.style.keys() if 'map' in k] if self.plot_type == 'pairplot': g = sns.pairplot(view.data, **self.style) elif self.plot_type == 'pairgrid': g = sns.PairGrid(view.data, **self.style) elif self.plot_type == 'facetgrid': g = sns.FacetGrid(view.data, **self.style) for opt, args in map_opts: plot_fn = getattr(sns, args[0]) if hasattr( sns, args[0]) else getattr(plt, args[0]) getattr(g, opt)(plot_fn, *args[1:]) plt.close(self.handles['fig']) self.handles['fig'] = plt.gcf() else: super(SNSFramePlot, self)._update_plot(axis, view)
def _update_plot(self, axis, view): style = self._process_style(self.style[self.cyclic_index]) if self.plot_type == 'factorplot': opts = dict(style, **({'hue': view.x2} if view.x2 else {})) sns.factorplot(x=view.x, y=view.y, data=view.data, **opts) elif self.plot_type == 'regplot': sns.regplot(x=view.x, y=view.y, data=view.data, ax=axis, **style) elif self.plot_type == 'boxplot': style.pop('return_type', None) style.pop('figsize', None) sns.boxplot(view.data[view.y], view.data[view.x], ax=axis, **style) elif self.plot_type == 'violinplot': if view.x: sns.violinplot(view.data[view.y], view.data[view.x], ax=axis, **style) else: sns.violinplot(view.data, ax=axis, **style) elif self.plot_type == 'interact': sns.interactplot(view.x, view.x2, view.y, data=view.data, ax=axis, **style) elif self.plot_type == 'corrplot': sns.corrplot(view.data, ax=axis, **style) elif self.plot_type == 'lmplot': sns.lmplot(x=view.x, y=view.y, data=view.data, ax=axis, **style) elif self.plot_type in ['pairplot', 'pairgrid', 'facetgrid']: style_keys = list(style.keys()) map_opts = [(k, style.pop(k)) for k in style_keys if 'map' in k] if self.plot_type == 'pairplot': g = sns.pairplot(view.data, **style) elif self.plot_type == 'pairgrid': g = sns.PairGrid(view.data, **style) elif self.plot_type == 'facetgrid': g = sns.FacetGrid(view.data, **style) for opt, args in map_opts: plot_fn = getattr(sns, args[0]) if hasattr( sns, args[0]) else getattr(plt, args[0]) getattr(g, opt)(plot_fn, *args[1:]) plt.close(self.handles['fig']) self.handles['fig'] = plt.gcf() else: super(SNSFramePlot, self)._update_plot(axis, view)
def _update_plot(self, axis, view): style = self._process_style(self.style[self.cyclic_index]) if self.plot_type == 'factorplot': opts = dict(style, **({'hue': view.x2} if view.x2 else {})) sns.factorplot(x=view.x, y=view.y, data=view.data, **opts) elif self.plot_type == 'regplot': sns.regplot(x=view.x, y=view.y, data=view.data, ax=axis, **style) elif self.plot_type == 'boxplot': style.pop('return_type', None) style.pop('figsize', None) sns.boxplot(view.data[view.y], view.data[view.x], ax=axis, **style) elif self.plot_type == 'violinplot': if view.x: sns.violinplot(view.data[view.y], view.data[view.x], ax=axis, **style) else: sns.violinplot(view.data, ax=axis, **style) elif self.plot_type == 'interact': sns.interactplot(view.x, view.x2, view.y, data=view.data, ax=axis, **style) elif self.plot_type == 'corrplot': sns.corrplot(view.data, ax=axis, **style) elif self.plot_type == 'lmplot': sns.lmplot(x=view.x, y=view.y, data=view.data, ax=axis, **style) elif self.plot_type in ['pairplot', 'pairgrid', 'facetgrid']: style_keys = list(style.keys()) map_opts = [(k, style.pop(k)) for k in style_keys if 'map' in k] if self.plot_type == 'pairplot': g = sns.pairplot(view.data, **style) elif self.plot_type == 'pairgrid': g = sns.PairGrid(view.data, **style) elif self.plot_type == 'facetgrid': g = sns.FacetGrid(view.data, **style) for opt, args in map_opts: plot_fn = getattr(sns, args[0]) if hasattr(sns, args[0]) else getattr(plt, args[0]) getattr(g, opt)(plot_fn, *args[1:]) if self._close_figures: plt.close(self.handles['fig']) self.handles['fig'] = plt.gcf() else: super(SNSFramePlot, self)._update_plot(axis, view)
def sea(): source_data = pd.read_csv( '/Users/saracamnasio/Dropbox/Research/Projects/UnusuallyRB/2016_Analysis/input/CP_results.csv' ) sns.pairplot(source_data, hue="Classification", vars=["Opt_SpT", "Lmin_mu"], palette="Set2", diag_kind="kde", size=2.5) sns.pairplot(source_data, hue="Classification", vars=["Opt_SpT", "Lmax_mu"], palette="Set2", diag_kind="kde", size=2.5) sns.pairplot(source_data, hue="Classification", vars=["JK_Dev", "Lmin_mu"], palette="Set2", diag_kind="kde", size=2.5) sns.pairplot(source_data, hue="Classification", vars=["HK_Dev", "Lmin_mu"], palette="Set2", diag_kind="kde", size=2.5) sns.pairplot(source_data, hue="Classification", vars=["JH_Dev", "Lmin_mu"], palette="Set2", diag_kind="kde", size=2.5) sns.pairplot(source_data, hue="Classification", vars=["JK_Dev", "Lmax_mu"], palette="Set2", diag_kind="kde", size=2.5) sns.pairplot(source_data, hue="Classification", vars=["HK_Dev", "Lmax_mu"], palette="Set2", diag_kind="kde", size=2.5) sns.pairplot(source_data, hue="Classification", vars=["JH_Dev", "Lmax_mu"], palette="Set2", diag_kind="kde", size=2.5)
import matplotlib.pyplot as plt import pandas as pd import seaborn.apionly as sns flowers = pd.read_csv('data/iris.csv') g = sns.pairplot(data=flowers, y_vars=['petal_length', 'petal_width'], x_vars=['sepal_length', 'sepal_width'], hue='species', plot_kws={'linewidth': 0}) legend = g.fig.legends[0] legend.draggable(True) plt.show(block=True) g.fig.savefig('iris-pairs.png', dpi=300)
get_ipython().magic('pinfo sns.pairplot') get_ipython().magic('ls ') color_palette = np.array([[71, 117, 167], [120, 121, 122], [248, 213, 95], [140, 175, 83], [152, 109, 163], [169, 93, 100]]) / 255 cars = pd.read_csv('cars.csv') cars = pd.read_csv('data/cars.csv') origin_dict = {1: 'USA', 2: 'Europe', 3: 'Japan'} cars['origin name'] = cars['origin'].apply(origin_dict.get) sns.pairplot(data=cars, x_vars=['weight'], y_vars=['mpg'], hue='origin name', palette=dict(zip(origin_dict.values(), color_palette))) len(set(cars['name'])) cars.shape sns.pairplot(data=cars, x_vars=['weight'], y_vars=['mpg'], hue='origin name') _.fig.legends[0].draggable(True) _.fig.legends[0].draggable(False) _16.fig.legends[0].draggable(False) _16.fig.savefig('cars-scatter.png', dpi=300)
boston_data = load_boston() train_data = np.array(boston_data.data) train_labels = np.array(boston_data.target) num_features = boston_data.data.shape[1] unique_labels = np.unique(train_labels) num_classes = len(unique_labels) print("The boston dataset has " + str(num_features) + " features") print(boston_data.feature_names) # Put everything into a Pandas DataFrame data = pd.DataFrame(data=np.c_[train_data], columns=boston_data.feature_names) # print(tabulate(data, headers='keys', tablefmt='psql')) # Compute the covariance matrix cov_mat_boston = np.cov(train_data.T) print("Covariance matrix") print(cov_mat_boston) # Normalize the data and then recompute the covariance matrix normalized_train_data = helpers.normalize_data(train_data) normalized_cov_mat_boston = np.cov(normalized_train_data.T) print("Normalized data covariance matrix") print(normalized_cov_mat_boston) # create scatterplot matrix fig = sns.pairplot(data=data, hue='CRIM') plt.show()
'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ] df.shape # ## Visualizzazione delle caratteristiche del dataset # Matrice delle distribuzioni mutue delle feature. Sulla diagonale, distribuzione delle singole feature # + cols = ['LSTAT', 'RM', 'INDUS', 'AGE', 'MEDV'] fig = plt.figure(figsize=(16, 8)) sns.pairplot(df[cols], height=4, diag_kind='kde', plot_kws=dict(color=colors[8]), diag_kws=dict(shade=True, alpha=.7, color=colors[0])) plt.show() # - # Visualizzazione della matrice di correlazione. Alla posizione $(i,j)$ il coefficiente di correlazione (lineare) tra le feature $i$ e $j$. Valore in $[-1,1]$: $1$ correlazione perfetta, $-1$ correlazione inversa perfetta, $0$ assenza di correlazione cm = np.corrcoef(df[cols].values.T) plt.figure(figsize=(12, 4)) hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10},
# Testing results agains existing function print("My covariance function: {}".format(covariance([1, 3, 4], [1, 0, 2]))) print("Numpy covariance function: {}".format(np.cov([1, 3, 4], [1, 0, 2]))) def correlation(X, Y): return (covariance(X, Y) / (np.std(X, ddof=1) * np.std(Y, ddof=1)) ) # we had to indicat ddof=1 the unbiased std print("My Correlation: {}".format(correlation([1, 1, 4, 3], [1, 0, 2, 2]))) print("Numpy corrcoef: {}".format(np.corrcoef([1, 1, 4, 3], [1, 0, 2, 2]))) # Start seeing a general view of the data to try to determine what is the best approach sns.pairplot(iris2, height=3.0) plt.show() # Based on the results, we chose X = iris2['petal_width'] Y = iris2['petal_length'] plt.scatter(X, Y) # plt.show() # Creating the prediction function def predict(alpha, beta, x_i): return beta * x_i + alpha
# ------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------ # Create scatterplot scatter_feature_name_1 = 'color_intensity' scatter_feature_name_2 = 'alcohol' fig = plt.scatter(data[scatter_feature_name_1], data[scatter_feature_name_2]) plt.xlabel(scatter_feature_name_1) plt.ylabel(scatter_feature_name_2) plt.show() # Create scatterplot matrix fig = sns.pairplot(data=data[[ 'alcohol', 'color_intensity', 'malic_acid', 'magnesium', 'category' ]], hue='category') plt.show() # ------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------ # Create bee swarm plot sns.swarmplot(x='category', y='total_phenols', data=data) plt.show() # ------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------
Created on Mon Jan 29 10:40:44 2018 @author: aditya royal """ import pandas as pd import seaborn.apionly as sns import numpy as np import matplotlib import matplotlib.pyplot as plt obj = pd.read_csv('C:/Users/aditya royal/Downloads/MPG.csv') mpg_year = obj.groupby(['model year'])['mpg'].mean() mpg_year.plot() plt.scatter(x=obj['mpg'], y=obj['horsepower']) plt.show() plt.bar(obj['mpg'], obj['origin']) #plt.bar(obj['car manufacturer'],obj['origin']) origin_group = obj.groupby(['cylinders']) plt.hist(obj['mpg']) for name, group in origin_group: plt.boxplot(origin_group['mpg'].get_group(name).values) plt.show() #sns.heatmap(obj[['mpg','cylinders']].T) plt.scatter(x=obj['weight'], y=obj['mpg'], c=list(obj['origin'].values), cmap=matplotlib.colors.ListedColormap(['red', 'green', 'blue'])) sns.pairplot( obj[['mpg', 'displacement', 'horsepower', 'weight', 'acceleration']].dropna())
# In[53]: # This makes a new datafram which removes the column that tells whether it is malignant or benign. y = data.type data_p=data.drop(columns="type") #create a new data array data_p.head() # In[57]: data_pair = data_mean.drop(columns=["ID"]) sns.pairplot(data_pair,hue='type') # # First Machine Learning Model # # This uses the linear model method with logistic regression to perform machine learning. This model is fitted to the mean features and tumor types. # In[17]: from sklearn import linear_model from sklearn import model_selection xreg = data_mean[['mean radius','mean texture','mean perimeter','mean area','mean smoothness','mean compactness','mean concavity','mean concave points','mean symmetry','mean fractal dimension']] # this makes an x variable with all the mean features yreg = data.type # this makes the y variable the type of tumor column clf = linear_model.LogisticRegression() # this sets up logistic regression clf.fit(xreg, yreg) # this fits it to the new x variable and y variable