Ejemplo n.º 1
0
 def _update_plot(self, axis, view):
     if self.plot_type == 'regplot':
         sns.regplot(x=view.x,
                     y=view.y,
                     data=view.data,
                     ax=axis,
                     **self.style)
     elif self.plot_type == 'boxplot':
         self.style.pop('return_type', None)
         self.style.pop('figsize', None)
         sns.boxplot(view.data[view.y],
                     view.data[view.x],
                     ax=axis,
                     **self.style)
     elif self.plot_type == 'violinplot':
         sns.violinplot(view.data[view.y],
                        view.data[view.x],
                        ax=axis,
                        **self.style)
     elif self.plot_type == 'interact':
         sns.interactplot(view.x,
                          view.x2,
                          view.y,
                          data=view.data,
                          ax=axis,
                          **self.style)
     elif self.plot_type == 'corrplot':
         sns.corrplot(view.data, ax=axis, **self.style)
     elif self.plot_type == 'lmplot':
         sns.lmplot(x=view.x,
                    y=view.y,
                    data=view.data,
                    ax=axis,
                    **self.style)
     elif self.plot_type in ['pairplot', 'pairgrid', 'facetgrid']:
         map_opts = [(k, self.style.pop(k)) for k in self.style.keys()
                     if 'map' in k]
         if self.plot_type == 'pairplot':
             g = sns.pairplot(view.data, **self.style)
         elif self.plot_type == 'pairgrid':
             g = sns.PairGrid(view.data, **self.style)
         elif self.plot_type == 'facetgrid':
             g = sns.FacetGrid(view.data, **self.style)
         for opt, args in map_opts:
             plot_fn = getattr(sns, args[0]) if hasattr(
                 sns, args[0]) else getattr(plt, args[0])
             getattr(g, opt)(plot_fn, *args[1:])
         plt.close(self.handles['fig'])
         self.handles['fig'] = plt.gcf()
     else:
         super(SNSFramePlot, self)._update_plot(axis, view)
Ejemplo n.º 2
0
 def _update_plot(self, axis, view):
     style = self._process_style(self.style[self.cyclic_index])
     if self.plot_type == 'factorplot':
         opts = dict(style, **({'hue': view.x2} if view.x2 else {}))
         sns.factorplot(x=view.x, y=view.y, data=view.data, **opts)
     elif self.plot_type == 'regplot':
         sns.regplot(x=view.x, y=view.y, data=view.data, ax=axis, **style)
     elif self.plot_type == 'boxplot':
         style.pop('return_type', None)
         style.pop('figsize', None)
         sns.boxplot(view.data[view.y], view.data[view.x], ax=axis, **style)
     elif self.plot_type == 'violinplot':
         if view.x:
             sns.violinplot(view.data[view.y],
                            view.data[view.x],
                            ax=axis,
                            **style)
         else:
             sns.violinplot(view.data, ax=axis, **style)
     elif self.plot_type == 'interact':
         sns.interactplot(view.x,
                          view.x2,
                          view.y,
                          data=view.data,
                          ax=axis,
                          **style)
     elif self.plot_type == 'corrplot':
         sns.corrplot(view.data, ax=axis, **style)
     elif self.plot_type == 'lmplot':
         sns.lmplot(x=view.x, y=view.y, data=view.data, ax=axis, **style)
     elif self.plot_type in ['pairplot', 'pairgrid', 'facetgrid']:
         style_keys = list(style.keys())
         map_opts = [(k, style.pop(k)) for k in style_keys if 'map' in k]
         if self.plot_type == 'pairplot':
             g = sns.pairplot(view.data, **style)
         elif self.plot_type == 'pairgrid':
             g = sns.PairGrid(view.data, **style)
         elif self.plot_type == 'facetgrid':
             g = sns.FacetGrid(view.data, **style)
         for opt, args in map_opts:
             plot_fn = getattr(sns, args[0]) if hasattr(
                 sns, args[0]) else getattr(plt, args[0])
             getattr(g, opt)(plot_fn, *args[1:])
         plt.close(self.handles['fig'])
         self.handles['fig'] = plt.gcf()
     else:
         super(SNSFramePlot, self)._update_plot(axis, view)
Ejemplo n.º 3
0
 def _update_plot(self, axis, view):
     style = self._process_style(self.style[self.cyclic_index])
     if self.plot_type == 'factorplot':
         opts = dict(style, **({'hue': view.x2} if view.x2 else {}))
         sns.factorplot(x=view.x, y=view.y, data=view.data, **opts)
     elif self.plot_type == 'regplot':
         sns.regplot(x=view.x, y=view.y, data=view.data,
                     ax=axis, **style)
     elif self.plot_type == 'boxplot':
         style.pop('return_type', None)
         style.pop('figsize', None)
         sns.boxplot(view.data[view.y], view.data[view.x], ax=axis,
                     **style)
     elif self.plot_type == 'violinplot':
         if view.x:
             sns.violinplot(view.data[view.y], view.data[view.x], ax=axis,
                            **style)
         else:
             sns.violinplot(view.data, ax=axis, **style)
     elif self.plot_type == 'interact':
         sns.interactplot(view.x, view.x2, view.y,
                          data=view.data, ax=axis, **style)
     elif self.plot_type == 'corrplot':
         sns.corrplot(view.data, ax=axis, **style)
     elif self.plot_type == 'lmplot':
         sns.lmplot(x=view.x, y=view.y, data=view.data,
                    ax=axis, **style)
     elif self.plot_type in ['pairplot', 'pairgrid', 'facetgrid']:
         style_keys = list(style.keys())
         map_opts = [(k, style.pop(k)) for k in style_keys if 'map' in k]
         if self.plot_type == 'pairplot':
             g = sns.pairplot(view.data, **style)
         elif self.plot_type == 'pairgrid':
             g = sns.PairGrid(view.data, **style)
         elif self.plot_type == 'facetgrid':
             g = sns.FacetGrid(view.data, **style)
         for opt, args in map_opts:
             plot_fn = getattr(sns, args[0]) if hasattr(sns, args[0]) else getattr(plt, args[0])
             getattr(g, opt)(plot_fn, *args[1:])
         if self._close_figures:
             plt.close(self.handles['fig'])
         self.handles['fig'] = plt.gcf()
     else:
         super(SNSFramePlot, self)._update_plot(axis, view)
Ejemplo n.º 4
0
def sea():

    source_data = pd.read_csv(
        '/Users/saracamnasio/Dropbox/Research/Projects/UnusuallyRB/2016_Analysis/input/CP_results.csv'
    )
    sns.pairplot(source_data,
                 hue="Classification",
                 vars=["Opt_SpT", "Lmin_mu"],
                 palette="Set2",
                 diag_kind="kde",
                 size=2.5)
    sns.pairplot(source_data,
                 hue="Classification",
                 vars=["Opt_SpT", "Lmax_mu"],
                 palette="Set2",
                 diag_kind="kde",
                 size=2.5)

    sns.pairplot(source_data,
                 hue="Classification",
                 vars=["JK_Dev", "Lmin_mu"],
                 palette="Set2",
                 diag_kind="kde",
                 size=2.5)
    sns.pairplot(source_data,
                 hue="Classification",
                 vars=["HK_Dev", "Lmin_mu"],
                 palette="Set2",
                 diag_kind="kde",
                 size=2.5)
    sns.pairplot(source_data,
                 hue="Classification",
                 vars=["JH_Dev", "Lmin_mu"],
                 palette="Set2",
                 diag_kind="kde",
                 size=2.5)

    sns.pairplot(source_data,
                 hue="Classification",
                 vars=["JK_Dev", "Lmax_mu"],
                 palette="Set2",
                 diag_kind="kde",
                 size=2.5)
    sns.pairplot(source_data,
                 hue="Classification",
                 vars=["HK_Dev", "Lmax_mu"],
                 palette="Set2",
                 diag_kind="kde",
                 size=2.5)
    sns.pairplot(source_data,
                 hue="Classification",
                 vars=["JH_Dev", "Lmax_mu"],
                 palette="Set2",
                 diag_kind="kde",
                 size=2.5)
Ejemplo n.º 5
0
import matplotlib.pyplot as plt
import pandas as pd
import seaborn.apionly as sns

flowers = pd.read_csv('data/iris.csv')

g = sns.pairplot(data=flowers,
                 y_vars=['petal_length', 'petal_width'],
                 x_vars=['sepal_length', 'sepal_width'],
                 hue='species',
                 plot_kws={'linewidth': 0})
legend = g.fig.legends[0]
legend.draggable(True)
plt.show(block=True)
g.fig.savefig('iris-pairs.png', dpi=300)
Ejemplo n.º 6
0
get_ipython().magic('pinfo sns.pairplot')
get_ipython().magic('ls ')
color_palette = np.array([[71, 117, 167],
                          [120, 121, 122],
                          [248, 213, 95],
                          [140, 175, 83],
                          [152, 109, 163],
                          [169, 93, 100]]) / 255

cars = pd.read_csv('cars.csv')
cars = pd.read_csv('data/cars.csv')
origin_dict = {1: 'USA', 2: 'Europe', 3: 'Japan'}
cars['origin name'] = cars['origin'].apply(origin_dict.get)
sns.pairplot(data=cars,
             x_vars=['weight'],
             y_vars=['mpg'],
             hue='origin name',
             palette=dict(zip(origin_dict.values(), color_palette)))

len(set(cars['name']))
cars.shape
sns.pairplot(data=cars,
             x_vars=['weight'],
             y_vars=['mpg'],
             hue='origin name')

_.fig.legends[0].draggable(True)
_.fig.legends[0].draggable(False)
_16.fig.legends[0].draggable(False)
_16.fig.savefig('cars-scatter.png', dpi=300)
Ejemplo n.º 7
0
boston_data = load_boston()

train_data = np.array(boston_data.data)
train_labels = np.array(boston_data.target)

num_features = boston_data.data.shape[1]
unique_labels = np.unique(train_labels)
num_classes = len(unique_labels)

print("The boston dataset has " + str(num_features) + " features")
print(boston_data.feature_names)

# Put everything into a Pandas DataFrame
data = pd.DataFrame(data=np.c_[train_data], columns=boston_data.feature_names)
# print(tabulate(data, headers='keys', tablefmt='psql'))

# Compute the covariance matrix
cov_mat_boston = np.cov(train_data.T)
print("Covariance matrix")
print(cov_mat_boston)

# Normalize the data and then recompute the covariance matrix
normalized_train_data = helpers.normalize_data(train_data)
normalized_cov_mat_boston = np.cov(normalized_train_data.T)
print("Normalized data covariance matrix")
print(normalized_cov_mat_boston)

# create scatterplot matrix
fig = sns.pairplot(data=data, hue='CRIM')

plt.show()
Ejemplo n.º 8
0
    'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
    'PTRATIO', 'B', 'LSTAT', 'MEDV'
]
df.shape

# ## Visualizzazione delle caratteristiche del dataset

# Matrice delle distribuzioni mutue delle feature. Sulla diagonale, distribuzione delle singole feature

# +
cols = ['LSTAT', 'RM', 'INDUS', 'AGE', 'MEDV']

fig = plt.figure(figsize=(16, 8))
sns.pairplot(df[cols],
             height=4,
             diag_kind='kde',
             plot_kws=dict(color=colors[8]),
             diag_kws=dict(shade=True, alpha=.7, color=colors[0]))
plt.show()
# -

# Visualizzazione della matrice di correlazione. Alla posizione $(i,j)$ il coefficiente di correlazione (lineare) tra le feature $i$ e $j$. Valore in $[-1,1]$: $1$ correlazione perfetta, $-1$ correlazione inversa perfetta, $0$ assenza di correlazione

cm = np.corrcoef(df[cols].values.T)
plt.figure(figsize=(12, 4))
hm = sns.heatmap(cm,
                 cbar=True,
                 annot=True,
                 square=True,
                 fmt='.2f',
                 annot_kws={'size': 10},
# Testing results agains existing function
print("My covariance function: {}".format(covariance([1, 3, 4], [1, 0, 2])))
print("Numpy covariance function: {}".format(np.cov([1, 3, 4], [1, 0, 2])))


def correlation(X, Y):
    return (covariance(X, Y) / (np.std(X, ddof=1) * np.std(Y, ddof=1))
            )  # we had to indicat ddof=1 the unbiased std


print("My Correlation: {}".format(correlation([1, 1, 4, 3], [1, 0, 2, 2])))
print("Numpy corrcoef: {}".format(np.corrcoef([1, 1, 4, 3], [1, 0, 2, 2])))

# Start seeing a general view of the data to try to determine what is the best approach

sns.pairplot(iris2, height=3.0)
plt.show()
# Based on the results, we chose
X = iris2['petal_width']
Y = iris2['petal_length']

plt.scatter(X, Y)

# plt.show()


# Creating the prediction function
def predict(alpha, beta, x_i):
    return beta * x_i + alpha

# ------------------------------------------------------------------------------------------------

# ------------------------------------------------------------------------------------------------

# Create scatterplot
scatter_feature_name_1 = 'color_intensity'
scatter_feature_name_2 = 'alcohol'
fig = plt.scatter(data[scatter_feature_name_1], data[scatter_feature_name_2])

plt.xlabel(scatter_feature_name_1)
plt.ylabel(scatter_feature_name_2)
plt.show()

# Create scatterplot matrix
fig = sns.pairplot(data=data[[
    'alcohol', 'color_intensity', 'malic_acid', 'magnesium', 'category'
]],
                   hue='category')

plt.show()

# ------------------------------------------------------------------------------------------------

# ------------------------------------------------------------------------------------------------

# Create bee swarm plot
sns.swarmplot(x='category', y='total_phenols', data=data)
plt.show()

# ------------------------------------------------------------------------------------------------

# ------------------------------------------------------------------------------------------------
Ejemplo n.º 11
0
Created on Mon Jan 29 10:40:44 2018

@author: aditya royal
"""

import pandas as pd
import seaborn.apionly as sns
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
obj = pd.read_csv('C:/Users/aditya royal/Downloads/MPG.csv')
mpg_year = obj.groupby(['model year'])['mpg'].mean()
mpg_year.plot()
plt.scatter(x=obj['mpg'], y=obj['horsepower'])
plt.show()
plt.bar(obj['mpg'], obj['origin'])
#plt.bar(obj['car manufacturer'],obj['origin'])
origin_group = obj.groupby(['cylinders'])
plt.hist(obj['mpg'])
for name, group in origin_group:
    plt.boxplot(origin_group['mpg'].get_group(name).values)
    plt.show()
#sns.heatmap(obj[['mpg','cylinders']].T)
plt.scatter(x=obj['weight'],
            y=obj['mpg'],
            c=list(obj['origin'].values),
            cmap=matplotlib.colors.ListedColormap(['red', 'green', 'blue']))
sns.pairplot(
    obj[['mpg', 'displacement', 'horsepower', 'weight',
         'acceleration']].dropna())

# In[53]:


# This makes a new datafram which removes the column that tells whether it is malignant or benign. 
y = data.type
data_p=data.drop(columns="type") #create a new data array
data_p.head()


# In[57]:


data_pair = data_mean.drop(columns=["ID"])
sns.pairplot(data_pair,hue='type')


# # First Machine Learning Model
# 
# This uses the linear model method with logistic regression to perform machine learning. This model is fitted to the mean features and tumor types.

# In[17]:


from sklearn import linear_model
from sklearn import model_selection
xreg = data_mean[['mean radius','mean texture','mean perimeter','mean area','mean smoothness','mean compactness','mean concavity','mean concave points','mean symmetry','mean fractal dimension']] # this makes an x variable with all the mean features 
yreg = data.type # this makes the y variable the type of tumor column
clf = linear_model.LogisticRegression() # this sets up logistic regression
clf.fit(xreg, yreg) # this fits it to the new x variable and y variable