Exemple #1
0
    def test_seabornw(self):
        """Test ipython widget for seaborn."""
        if not self.has_ipywidgets():
            raise self.SkipTest("This test requires ipywidgets")

        tips = sns.load_dataset("tips")
        snw.jointplot(tips)

        titanic = sns.load_dataset("titanic")
        snw.countplot(titanic)

        tips = sns.load_dataset("tips")
        snw.jointplot(tips)
        snw.swarmplot(tips)
        snw.lmplot(tips)

        exercise = sns.load_dataset("exercise")
        snw.factorplot(exercise)
        snw.violinplot(tips)
        snw.stripplot(tips)
        snw.swarmplot(tips)
        snw.pointplot(tips)
        snw.barplot(tips)

        np.random.seed(0)
        uniform_data = np.random.rand(10, 12)
        snw.heatmap(uniform_data)

        flights = sns.load_dataset("flights")
        flights = flights.pivot("month", "year", "passengers")
        snw.clustermap(flights)
def corrplot_example():
    """
        Birds-eye view of a large dataset to see correlation matrix with a
        heat map.  Also gets a permutationt test to get p values.  If you
        have a huge dataset, will take a while and p values aren't relevant.
    """
    titanic = sns.load_dataset("titanic").dropna()  # load dataset1
    attention = sns.load_dataset("attention")  # load dataset2
    sns.set_context(rc={"figure.figsize": (8,8)})  # set size
    sns.corrplot(titanic)  # plot dataset1
    #sns.corrplot(titanic,  # dataset
    #             sig_tail="upper",  # specify if only want pos or neg values
    #             cmap_range(-.3, 0))  # specify colormap range
    sns.corrplot(attention)  # plot dataset2
    plt.show()
Exemple #3
0
    def load_dataset(name: str) -> pd.DataFrame:
        """Load example dataset.

        If seaborn is present, its datasets can be loaded.
        Physt also includes some datasets in CSV format.
        """
        # Our custom datasets:
        try:
            binary_data = pkgutil.get_data('physt', 'examples/{0}.csv'.format(name))
            return pd.read_csv(io.BytesIO(binary_data))
        except FileNotFoundError:
            pass

        # Seaborn datasets?
        try:
            import seaborn as sns
            import warnings
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                if name in sns.get_dataset_names():
                        return sns.load_dataset(name)
        except ImportError:
            pass

        # Fall through
        raise RuntimeError("Dataset {0} not available.".format(name))
def facetgrid_example():
    """
        Use FacetGrid to visualize distribution of a variable's relationship
        between multiple variables; plot shows the same relationship
            conditioned on different levels of other variables
        * Main approach, FacetGrid.map() with name(s) of variables in dataframe
        * FacetGrid initializes grid and sets up figure and axes
    """
    tips = sns.load_dataset("tips")
    # print tips.head()
    #   total_bill   tip   sex  smoker  day    time  size
    # 0      16.99  1.01  Male     No   Sun  Dinner     2

    # Histogram with split by Time (Dinner, Lunch)
    my_grid = sns.FacetGrid(tips, col="time")  # Splits by 'Dinner' and 'Lunch'
    my_grid.map(plt.hist, "tip")
    plt.show()

    # Scatterplot with split by Gender (Male, Female)
    my_grid = sns.FacetGrid(tips, col="sex", hue="smoker")  # Splits by M/F
    my_grid.map(plt.scatter, "total_bill", "tip", alpha=.7)
    my_grid.add_legend()
    plt.show()

    # Scatterplot with 4 way split by Time (Dinner, Lunch) and Smoker (Yes, No)
    my_grid = sns.FacetGrid(tips, row="smoker", col="time", margin_titles=True)
    my_grid.map(sns.regplot, "size", "total_bill", color=".3",
                fit_reg=False, x_jitter=.1)
    plt.show()

    # Barplot split by Day (Fri, Sat, Sun) grouped by Gender (Male, Female)
    my_grid = sns.FacetGrid(tips, col="day", size=4, aspect=.5)
    my_grid.map(sns.barplot, "sex", "total_bill")
    plt.show()
def scatterplot():
    '''Fancy scatterplots, using the package "seaborn" '''
    import seaborn as sns
    
    df = sns.load_dataset("iris")
    sns.pairplot(df, hue="species", size=2.5)    
    C2_8_mystyle.printout_plain('multiScatterplot.png')
Exemple #6
0
    def test_pandasw(self):
        """Test ipython widget for pandas."""
        if not self.has_ipywidgets():
            raise self.SkipTest("This test requires ipywidgets")

        import seaborn as sns
        data = sns.load_dataset("iris")
        assert pdw.plot(data)
def scatterplot():
    import seaborn as sns
    sns.set()
    sns.set_context('poster')
    
    df = sns.load_dataset("iris")
    sns.pairplot(df, hue="species", size=2.5)    
    mystyle.printout_plain('multiScatterplot.png')
Exemple #8
0
def demo01():
    import matplotlib.pyplot as plt
    import seaborn as sns
    sns.set()
    iris = sns.load_dataset('iris')
    print(iris.head())
    sns.pairplot(iris, hue='species', size=1.5)
    plt.show()
Exemple #9
0
def facet_grid():

    tips = sns.load_dataset('tips')
    tips.head()
    tips['tip_pct'] = 100 * tips['tip'] / tips['total_bill']

    grid = sns.FacetGrid(tips, row="sex", col="time", margin_titles=True)
    grid.map(plt.hist, "tip_pct", bins=np.linspace(0, 40, 15));
    plt.show()
Exemple #10
0
def series_tiempo():
    
    planets = sns.load_dataset('planets')
    planets.head()
    
    with sns.axes_style('white'):
        g = sns.factorplot("year", data=planets, aspect=1.5)
        g.set_xticklabels(step=5)
        
    plt.show()
def test1():
    sns.set(style="ticks")

    # Load the example tips dataset
    tips = sns.load_dataset("tips")

    # Draw a nested boxplot to show bills by day and sex
    sns.boxplot(x="day", y="total_bill", hue="sex", data=tips, palette="PRGn")
    sns.despine(offset=10, trim=True)
    plt.show()
Exemple #12
0
def test2():
    import seaborn as sns
    sns.set(style="whitegrid", palette="pastel", color_codes=True)

    # Load the example tips dataset
    tips = sns.load_dataset("tips")
    print tips

    # Draw a nested violinplot and split the violins for easier comparison
    sns.violinplot(x="day", y="total_bill", hue="sex", data=tips, split=True,
                   inner="quart", palette={"Male": "b", "Female": "y"})
    sns.despine(left=True)
Exemple #13
0
def demo02():
    import matplotlib.pyplot as plt
    import seaborn as sns
    sns.set()
    iris = sns.load_dataset('iris')
    print(iris.head())
    print(iris.shape)
    X_iris = iris.drop('species', axis=1)
    print(X_iris.head())
    print(X_iris.shape)
    y_iris = iris['species']
    print(y_iris.head())
    print(y_iris.shape)
Exemple #14
0
def plot_iris():
    iris = sns.load_dataset("iris")
    sns.set(style="ticks", color_codes=True)
    plt.xkcd()
    figure = sns.pairplot(iris,
                          size=4,
                          x_vars=["sepal_width", "sepal_length"],
                          y_vars=["petal_width", "petal_length"],
                          hue="species")
    sio = BytesIO()
    figure.savefig(sio, format="png")
    image = base64.encodebytes(sio.getvalue()).decode()
    #image = base64.b64encode(sio.getvalue())
    plt.clf()
    return image
Exemple #15
0
def test():
    sns.set(style="whitegrid")

    # Load the example Titanic dataset
    titanic = sns.load_dataset("titanic")
    print titanic

    # Set up a grid to plot survival probability against several variables
    g = sns.PairGrid(titanic, y_vars="survived",
                     x_vars=["class", "sex", "who", "alone"],
                     size=5, aspect=.5)

    # Draw a seaborn pointplot onto each Axes
    g.map(sns.pointplot, color=sns.xkcd_rgb["plum"])
    g.set(ylim=(0, 1))
    sns.despine(fig=g.fig, left=True)
Exemple #16
0
def get_data():
    # Load the Iris Data
    iris = sns.load_dataset("iris")
    
    # Now lets get the data and labels
    data = iris[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].values
    labels = np.array([i//50 for i in range(iris.shape[0])])
    
    # For now we assume two compponents, to make plotting easier.
    pca = PCA(n_components=2)

    # Fit model to the data
    pca.fit(data)
    
    # Compute the transformed data (rotation to PCA space)
    data_reduced = pca.transform(data)

    return np.concatenate((data_reduced, labels.reshape((150, 1))), axis=1)
def denggaoxian():
    sns.set(style="darkgrid")
    iris = sns.load_dataset("iris")
    # Subset the iris dataset by species
    setosa = iris.query("species == 'setosa'")
    virginica = iris.query("species == 'virginica'")
    # Set up the figure
    f, ax = plt.subplots(figsize=(8, 8))
    ax.set_aspect("equal")
    # Draw the two density plots
    ax = sns.kdeplot(setosa.sepal_width, setosa.sepal_length, cmap="Reds", shade=True, shade_lowest=False)
    ax = sns.kdeplot(virginica.sepal_width, virginica.sepal_length, cmap="Blues", shade=True, shade_lowest=False)
    # Add labels to the plot
    red = sns.color_palette("Reds")[-2]
    blue = sns.color_palette("Blues")[-2]
    ax.text(2.5, 8.2, "user1", size=16, color=blue)
    ax.text(3.8, 4.5, "user2", size=16, color=red)
    plt.show()
def pairgrid_pairplot_example():
    """
        Draw a grid of small subplots with the same plot type in each
        PairGrid, each row and col is assigned to a different variable showing
            a plot of each pairwise relationship (scatterplot matrix)
        * PairGrid shows different relationship conditioned on different levels
        of other variables.
        * pairplot is a quicker looker at the dataset
    """

    # Relationship plot
    iris = sns.load_dataset("iris")
    my_grid = sns.PairGrid(iris)
    my_grid.map(plt.scatter)
    plt.show()

    # Relationship plot and coloring a separate categorical variable
    my_grid = sns.PairGrid(iris, hue="species", palette="Set2")
    my_grid.map(plt.scatter)
    my_grid.add_legend()
    plt.show()
Exemple #19
0
def download():
    # Load the example tips dataset
    iris = sns.load_dataset("iris")

    # Plot tip as a function of toal bill across days
    g = sns.lmplot(
        x="sepal_length", y="sepal_width", hue="species", truncate=True, size=5, data=iris
    )

    # Use more informative axis labels than are provided by default
    g.set_axis_labels("Sepal length (mm)", "Sepal width (mm)")

    fig = g.fig
    buf = BytesIO()
    fig.canvas.print_figure(buf, format='svg')

    headers = {
        'Content-Type': 'image/svg+xml',
        'Content-Disposition': 'attachment; filename="graph.svg"',
    }
    return http.Response(buf.getvalue(), headers=headers)
Exemple #20
0
def iris():
    from sklearn.cross_validation import train_test_split
    import matplotlib.pyplot as plt
    import seaborn as sns
    sns.set()
    iris = sns.load_dataset('iris')
    print(iris.head())
    print(iris.shape)
    X_iris = iris.drop('species', axis=1)
    # print(X_iris.head())
    # print(X_iris.shape)
    y_iris = iris['species']
    # print(y_iris.head())
    # print(y_iris.shape)
    Xtrain, Xtest, ytrain, ytest = train_test_split(X_iris, y_iris, random_state=1)
    from sklearn.naive_bayes import GaussianNB  # 1.选择模型类
    model = GaussianNB()  # 2.初始化模型
    model.fit(Xtrain, ytrain)  # 3.用模型拟合数据
    y_model = model.predict(Xtest)  # 4.对新数据进行预测
    from sklearn.metrics import accuracy_score
    print(accuracy_score(ytest, y_model))
"""
Scatterplot with categorical and numerical semantics
====================================================

_thumb: .45, .5

"""
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="whitegrid")

# Load the example diamonds dataset
diamonds = sns.load_dataset("diamonds")

# Draw a scatter plot while assigning point colors and sizes to different
# variables in the dataset
f, ax = plt.subplots(figsize=(6.5, 6.5))
sns.despine(f, left=True, bottom=True)
clarity_ranking = ["I1", "SI2", "SI1", "VS2", "VS1", "VVS2", "VVS1", "IF"]
sns.scatterplot(x="carat", y="price",
                hue="clarity", size="depth",
                palette="ch:r=-.2,d=.3_r",
                hue_order=clarity_ranking,
                sizes=(1, 8), linewidth=0,
                data=diamonds, ax=ax)
Exemple #22
0
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# importing the dataset
iris_df = sns.load_dataset("iris");

print(iris_df.head())

# Creating feature set
X = iris_df.drop(["species"], axis=1)

# Creating label set
y = iris_df["species"]

# Converting labels to numbers
le = LabelEncoder()
y = le.fit_transform(y)

# Dividing data into 80-20% training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

# applying scaling on training and test data
sc = StandardScaler()
Exemple #23
0
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl

titanic = sns.load_dataset('titanic')
from pdsh_pandas import display

planets = sns.load_dataset('planets')
print(planets.shape)
print(planets.head)

rng = np.random.RandomState(42)
ser = pd.Series(rng.rand(10))
print(ser.sum())
print(ser.mean())

df = pd.DataFrame({'A': rng.rand(5), 'B': rng.rand((5))})
print(df.mean(axis=1))
print(planets.dropna().describe())

df = pd.DataFrame(
    {
        'key': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A'],
        'data1': rng.rand(10)
    },
    columns=['key', 'data1'])

print(df)
print(df.groupby('key').sum())
Exemple #24
0
#Iris Data sorted into columns
#Hugh O'Reilly 04/03/18
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sc

iris = sns.load_dataset("data/iris.csv")
iris.head()
Exemple #25
0
plt.axis('equal')
plt.show()

#%%

sns.set_style("white")
sns.kdeplot(x, y)

#%%

sns.kdeplot(x, y, cmap="Blues", shade=True, shade_lowest=True, n_levels=5)

#%%

# Contour plot with Iris dataset
df = sns.load_dataset('iris')

# Contour plot 2D básico
sns.set_style("white")
sns.kdeplot(df.sepal_width, df.sepal_length)

#%%

sns.kdeplot(df.sepal_width,
            df.sepal_length,
            cmap="Reds",
            shade=True,
            bw=.15,
            shade=True)

#%%
Exemple #26
0
d.plot()
d.plot(subplots=True)  #将各列单独画在一个图形

#柱形图
fig, axes = plt.subplots(2, 1)
s = pd.Series(rand(10).cumsum(), index=list('abcdefghij'))
s.plot(kind='bar', ax=axes[0], color='k', alpha=0.5)
s.plot(kind='barh', ax=axes[1], color='k', alpha=0.5)

d = pd.DataFrame(rand(10, 4).cumsum(0),
                 index=list('abcdefghij'),
                 columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
d.plot(kind='bar', alpha=0.5)

#堆积图
d.plot(kind='bar', stacked=True, alpha=0.5)

#直方图
import seaborn
tips = seaborn.load_dataset('tips')
tips['tip_pct'] = tips['tip'] / tips['total_bill']
tips['tip_pct'].hist()

#密度图
tips['tip_pct'].plot(kind='kde')

#散点图矩阵
from sklearn import datasets
iris = datasets.load_iris()
pd.scatter_matrix(pd.DataFrame(iris.data), diagonal='kde', alpha=0.5)
Exemple #27
0
import warnings
warnings.simplefilter('ignore')

import numpy
from matplotlib import pyplot
import seaborn

import probscale
clear_bkgd = {'axes.facecolor': 'none', 'figure.facecolor': 'none'}
seaborn.set(style='ticks', context='talk', color_codes=True, rc=clear_bkgd)

# load up some example data from the seaborn package
tips = seaborn.load_dataset("tips")
iris = seaborn.load_dataset("iris")

position, bill = probscale.plot_pos(tips['total_bill'])
position *= 100
fig, ax = pyplot.subplots(figsize=(6, 3))
ax.plot(position, bill, marker='.', linestyle='none', label='Bill amount')
ax.set_xlabel('Percentile')
ax.set_ylabel('Total Bill (USD)')
ax.set_yscale('log')
ax.set_ylim(bottom=1, top=100)
seaborn.despine()
# -*- coding:utf-8 -*-
# __author__ = "Lyon"
# Date = 2018/5/31

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg
from matplotlib.figure import Figure
import numpy as np
import pandas as pd
import seaborn as sns

# ------------------------------------------------------------
# 数据可视化的重要性 Anscombe's quartet 安斯库姆四重奏
# ------------------------------------------------------------
anscombe_df = sns.load_dataset("anscombe")
print(anscombe_df.head())
anscombe_df.groupby("dataset").agg([np.mean, np.var])

sns.set(style="ticks")
sns.lmplot(x="x",
           y="y",
           col="dataset",
           hue="dataset",
           data=anscombe_df,
           col_wrap=2,
           ci=None,
           palette="muted",
           size=4,
           scatter_kws={
               "s": 50,
Exemple #29
0
# 导入库
import sys

import seaborn as sn
sn.set_style("whitegrid")

import matplotlib.pyplot as plt

names = sn.get_dataset_names()

print(f'dataset names: {names}')

#sys.exit(0)

# 导入自带数据
tips = sn.load_dataset("tips")  # csv 文件

# 单一分组参数
sn.lmplot(x="total_bill", y="tip", data=tips)
plt.savefig("./lmplot1.png", dpi=600)

# 分组的线性回归图,hue参数控制分类属性
sn.lmplot(x="total_bill", y="tip", hue="smoker", markers=["o", "*"], data=tips)
plt.savefig("./lmplot2.png", dpi=600)

# col+hue 双分组参数,既分组,又分子图绘制
# 同时控制axes 列数,及 size 尺寸
sn.lmplot(x="total_bill",
          y="tip",
          col="day",
          hue="day",
import numpy as np
import pandas as pd
from numpy.random import randn
import matplotlib as mlp
import matplotlib.pyplot as plt
import seaborn as sns

df = sns.load_dataset('flights')
df2 = df.pivot('year', 'month', 'passengers')
print(df2)

#sns.clustermap(df2).savefig('cluster1.png')

#sns.clustermap(df2, col_cluster = False).savefig('cluster2.png')

#Standardization
#standard_scale = 0 means standardization by rows
sns.clustermap(df2, standard_scale=0).savefig('cluster3.png')
sns.clustermap(df2, standard_scale=1).savefig('cluster4.png')
Exemple #31
0
    N = m + n;M = m * n / N;K = 1.36
    p = np.arange(m)/(m-1.)
    yq = qy(p)
    yl = qy(p - K/np.sqrt(M))
    yu = qy(p + K/np.sqrt(M))
    x=x.values
    plt.plot(x, yq,linestyle=' ', marker='o', color='k');
    plt.plot(x, yl,color='k',alpha=0.4);
    plt.plot(x, yu,color='k',alpha=0.4)






tips = sns.load_dataset("tips").iloc[:,[0,1]];tips.columns=[0,1]


stats.probplot(x, dist="norm", plot=plt)
x.sort_values()
x=tips[0]
_, xr = \
stats.probplot(np.log(np.random.rand(1000)), fit=False,dist='uniform', plot=plt);
plt.gca().set_xscale("log");plt.gca().set_yscale("log")



stats.probplot(y, fit=False,dist='uniform')[1]
stats.probplot(y, fit=False,dist='norm')[1]
df=-pd.DataFrame(np.random.rand(100,2)).applymap(np.log)
plt.scatter(a[0].sort_values(ascending=False),a[1].sort_values(ascending=False))
Exemple #32
0
from seaborn import load_dataset
import pandashack  # noqa: F401
from pandas.testing import assert_frame_equal

df_iris = load_dataset('iris')


def test_simple_mutate():
    df_assign = df_iris.copy().assign(
        new=lambda df: df.sepal_length / df.petal_width)
    df_mutate = df_iris.copy().mutate(new='sepal_length / petal_width')
    assert_frame_equal(df_assign, df_mutate)


def test_mutate_with_methods():
    df_assign = df_iris.copy().assign(
        new=lambda df: df.sepal_length.mean() / df.petal_width)
    df_mutate = df_iris.copy().mutate(new='sepal_length.mean() / petal_width')
    assert_frame_equal(df_assign, df_mutate)


def test_mutate_with_variables():
    a = 3
    df_assign = df_iris.copy().assign(
        new=lambda df: df.sepal_length * a / df.petal_width)
    df_mutate = df_iris.copy().mutate(new='sepal_length *@a / petal_width')
    df_mutate_2 = df_iris.copy().mutate(new='sepal_length *a / petal_width')
    assert_frame_equal(df_assign, df_mutate)
    assert_frame_equal(df_mutate, df_mutate_2)

Exemple #33
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")

fmri = sns.load_dataset("fmri")

sns.relplot(x="timepoint", y="signal", hue="event", style="event",
            col="subject", col_wrap=5,
            height=3, aspect=.75, linewidth=2.5,
            kind="line", data=fmri.query("region == 'frontal'"));

plt.show()
Exemple #34
0
 def setUp(self):
     self.df = sns.load_dataset('titanic')
Exemple #35
0
sns.boxplot(data1, vert = False)

# violin
%matplotlib inline
data1 = stats.norm(0,5).rvs(100)
data2 = np.concatenate([stats.gamma(5).rvs(50)-1,-1*stats.gamma(5).rvs(50)])
sns.boxplot(data=[data1,data2],whis=np.inf)
sns.violinplot(data=[data1,data2])

sns.violinplot(data2,bw=0.01)

sns.violinplot(data1,inner='stick') #adding rugs


##### Regression plots
tips = sns.load_dataset('tips')
tips.head()

sns.lmplot('total_bill','tip',tips) #scatter+linear fit

sns.lmplot('total_bill','tip',tips,
          scatter_kws={'marker':'o','color':'indianred'},
          line_kws={'linewidth':1,'color':'blue'})

sns.lmplot('total_bill','tip',tips,order=4,   # change order of the fit
          scatter_kws={'marker':'o','color':'indianred'},
          line_kws={'linewidth':1,'color':'blue'})

sns.lmplot('total_bill','tip',tips,fit_reg=False) # no fit

tips.head()
"""
Scatterplot with continuous hues and sizes
==========================================

_thumb: .51, .44

"""
import seaborn as sns
sns.set(style="whitegrid")

# Load the example planets dataset
planets = sns.load_dataset("planets")

cmap = sns.cubehelix_palette(rot=-.2, as_cmap=True)
g = sns.relplot(
    data=planets,
    x="distance", y="orbital_period",
    hue="year", size="mass",
    palette=cmap, sizes=(10, 200),
)
g.set(xscale="log", yscale="log")
g.ax.xaxis.grid(True, "minor", linewidth=.25)
g.ax.yaxis.grid(True, "minor", linewidth=.25)
g.despine(left=True, bottom=True)
shape = ['o', '*', '^']
while count < 3:
    plt.plot(param_dic['boundary'][ind:ind + 33],
             shape[count],
             label=cond[count],
             color=color[count])
    plt.axhline(np.mean(param_dic['boundary'][ind:ind + 33]),
                color=color[count],
                linestyle='--')
    ind = ind + 33
    count += 1
    print(count)

import seaborn as sns
sns.set_theme(style='whitegrid')
tips = sns.load_dataset("tips")
data1 = param_dic['boundary'][0:33]
data2 = param_dic['boundary'][33:66]
data3 = param_dic['boundary'][66:]

ax = sns.violinplot(data=[data1, data2, data3], palette="muted")
ax.set_xticklabels(['easy', 'medium', 'hard'])

ax.set_ylabel('Boundary')
ax.set_xlabel('Condition')
plt.title('Boundary Estimates From Model 1')
plt.legend()

##############varsigma############

import seaborn as sns
Exemple #38
0
import seaborn as sns

from bokeh import mpl
from bokeh.plotting import output_file, show

sns.set(style="darkgrid")

# Load the long-form example gammas dataset
gammas = sns.load_dataset("gammas")

# Plot the response with standard error
sns.tsplot(data=gammas, time="timepoint", unit="subject",
           condition="ROI", value="BOLD signal")

output_file("tsplot.html", title="tsplot.py example")

show(mpl.to_bokeh())

from flask import Flask, request, jsonify
from flask_restful import Resource, Api

import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.formula.api as sm

app = Flask(__name__)
api = Api(app)

#  Setup our Prediction model
tips = sns.load_dataset("tips")
tips.rename(columns={'smoker': 'drinker', 'sex': 'gender'}, inplace=True)
formula = 'tip ~ total_bill + size + C(gender) + C(drinker) + C(day) + C(time)'
model = sm.ols(formula, data=tips)    # Describe model
results = model.fit()                 # Fit model 


class PredictTip(Resource):

	def post(self):
		request_data = request.get_json()

		try:
			columns = ['total_bill', 'gender', 'drinker', 'day', 'time', 'size']
			df = pd.DataFrame(request_data, columns=columns)
		except:
			return {'error': 'You posted bad data!'}

		#  Aggrigation & conversion from Numpy types to native types
Exemple #40
0
# mean, cov = [0, 1], [(1, .5), (.5, 1)]
# data = np.random.multivariate_normal(mean, cov, 200)
# df = pd.DataFrame(data, columns=['x', 'y'])
# f, ax = plt.subplots(figsize=(6, 6))
# sns.kdeplot(df.x, df.y, ax=ax)
# sns.rugplot(df.x, color='g', ax=ax)
# sns.rugplot(df.y, vertical=True, ax=ax)
# plt.show()

# mean, cov = [0, 1], [(1, .5), (.5, 1)]
# data = np.random.multivariate_normal(mean, cov, 200)
# df = pd.DataFrame(data, columns=['x', 'y'])
# f, ax = plt.subplots(figsize=(6, 6))
# cmap = sns.cubehelix_palette(as_cmap=True, dark=0, light=1, reverse=True)
# sns.kdeplot(df.x, df.y, cmap=cmap, n_levels=60, shade=True)
# plt.show()

sns.set(style='whitegrid', color_codes=True)
np.random.seed(sum(map(ord, "categorical")))
tips = sns.load_dataset("tips")
titanic = sns.load_dataset('titanic')
iris = sns.load_dataset('iris')
# palette 调色板
# 分组绘制箱线图,分组因子是day,在x轴不同位置绘制
# 分组箱线图,分子因子是time,不同的因子用不同颜色区分
# 相当于分组之后又分组
print tips
# sns.boxplot(y="total_bill", x="day", hue="time", data=tips)
# plt.show()
Exemple #41
0
def plot_color_legend():
    """Show how colors map back to the labeled cell types"""
    sns.palplot(cluster_names_to_color)
    ax = plt.gca()
    xticks = np.arange(0, cluster_names_to_color.shape[0])
    ax.set(xticklabels=cluster_names_to_color.index, xticks=xticks)
    ax.grid(False)
    # plt.show()


### --- Anscombe's quartet --- ###
# Convienence function that reads the data
# Equivalent to:
# anscombe = pd.read_csv("https://github.com/mwaskom/seaborn-data/raw/master/anscombe.csv")
# But is much easier to write
anscombe = sns.load_dataset('anscombe')


def explore_anscombe(summary):

    statistical = ('mean', 'var', 'std')
    grouped = anscombe.groupby('dataset')

    col = None

    if summary in statistical:
        summarized = getattr(grouped, summary)()
        tidy = summarized.unstack().reset_index()
        tidy = tidy.rename(columns={'level_0': 'variable', 0: summary})
        col = 'variable'
    else:
Exemple #42
0
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
sns.set_style("darkgrid")
titanic = sns.load_dataset("titanic")
print(titanic.head())
print(titanic.info())

#sns.jointplot(x="fare",y="age",data=titanic)
#sns.distplot(titanic["fare"],kde=False) #for single column in dataset
#sns.boxplot(x="class",y="age",data=titanic,palette="rainbow")
#sns.swarmplot(x="class",y="age",data=titanic,palette="Set2")
#sns.countplot(x="sex",data=titanic)
# cor = titanic.corr()
# sns.heatmap(cor,cmap="coolwarm")
# plt.title("titanic.corr()")

# s =  sns.FacetGrid(data=titanic,col="sex")
# s.map(plt.hist,"age")

plt.show()
Exemple #43
0
import seaborn as sns

# ---------------------------------------------------------------------------- #

st.title('Integrating Seaborn and Streamlit')
st.subheader('Rebecca Weng | Dec. 2020')

fig_type = st.selectbox('Plot Type',
                        ('Histogram', 'Lineplot', 'Scatterplot', 'Boxplot'))

# ---------------------------------------------------------------------------- #

# Adapted from "Stacked histogram on a log scale" from seaborn.pydata.org
if fig_type == 'Histogram':

    diamonds = sns.load_dataset("diamonds")

    # Workaround for displaying categorical variables
    st.write(diamonds.astype('object'))

    sns.set_theme(style="darkgrid")

    # Slider to determine the number of bins
    bin_num = st.slider("Number of Bins #1",
                        min_value=5,
                        max_value=100,
                        value=30,
                        step=5)

    plt = sns.histplot(diamonds[diamonds['cut'] == 'Ideal'],
                       x="price",
Exemple #44
0
import seaborn as sea
tipsDf = sea.load_dataset("tips")
print(tipsDf.describe().T)
#hangi gün daha çok kazanç
#print(sea.boxplot(x= "day", y= "total_bill",data=tipsDf))
#hangi öğün  daha çok kazanç
#print(sea.boxplot(x= "time", y= "total_bill",data=tipsDf))
# #Kişi Sayısı Kazanç ilişkisi
#print(sea.boxplot(x= "size", y= "total_bill",data=tipsDf))
print(sea.boxplot(x="day", y="total_bill", hue="sex", data=tipsDf))
Exemple #45
0
print("'anagrams' data set from seaborn :", sb.load_dataset('anagrams'))
print()
print("car_crashes data set from seaborn :", sb.load_dataset('car_crashes'))
print()
print(pd.read_csv("student_records.csv")) # loading user data set using pandas library


# In[1]:


# Questions 5:
# Which country origin cars are a part of this dataset?

import seaborn as sb
import pandas as pd
data_set = sb.load_dataset('mpg')
print(data_set)

df = pd.DataFrame(data_set)
print(" country origin :", df.origin.unique())

# Output :
# /home/yogi/Desktop/Python_Code/venv/bin/python /home/yogi/Desktop/Python_Code/
# Lets_Upgrade_Assignments/Day4/Day4_Que5.py
#       mpg  cylinders  ...  origin                       name
# 0    18.0          8  ...     usa  chevrolet chevelle malibu
# 1    15.0          8  ...     usa          buick skylark 320
# 2    18.0          8  ...     usa         plymouth satellite
# 3    16.0          8  ...     usa              amc rebel sst
# 4    17.0          8  ...     usa                ford torino
# ..    ...        ...  ...     ...                        ...
Exemple #46
0
        return newWeights

    def addXNoughts(self, x):
        x = np.reshape(x, ((x.shape[0], np.size(
            x[0]))))  # reshapes flat array or 2d array into transposed version
        x0 = np.ones((x.shape[0], 1), dtype=np.uint8)
        x = np.hstack((x0, x))
        return x

    def train(self, epochs, x, y, learningRate):
        x = self.addXNoughts(x)
        for i in range(epochs):
            self.w = self.doGradientDescent(learningRate, x, y)
            print("epoch " + str(i))
            #print(self.getCost(x,y)) # shows progress
        print("\nFinal Cost:")
        print(self.getCost(x, y))


learningRate = 0.01
epochs = 200

iris = sns.load_dataset('iris').to_numpy()[:, :4].astype(np.float32)
np.random.shuffle(iris)

x = iris[:, 0:3]
y = iris[:, 3:4]

a = LinearRegression(x)
a.train(epochs, x, y, learningRate)
Exemple #47
0
# -*- coding: utf-8 -*-
"""
Created on Thu Jul  7 16:45:49 2016

@author: Dell
"""

import seaborn as sns; sns.set()
flights = sns.load_dataset("flights")
flights = flights.pivot("month", "year", "passengers")
g = sns.clustermap(flights)
import seaborn as sns
import pandas as pd
import numpy as np

sns.set(style="ticks", color_codes=True)
iris = sns.load_dataset("iris")
g = sns.pairplot(iris, hue="species", palette="husl")
Exemple #49
0
__author__ = 'Noventa'

import numpy as np
from numpy.random import randn
import pandas as pd

from scipy import stats

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

flights = sns.load_dataset('flights')
print(flights.head())

flights = flights.pivot('month', 'year', 'passengers')
print(flights)

''' Backend error on windows macOS with heat maps... so sad. Will try Bokeh
sns.heatmap(flights)
plt.show()
'''

import bokeh as bo
from bokeh.charts import HeatMap, output_file, show

output_file('heatmap.html')
p = HeatMap(flights, title='Flights')

show(p)
# -*- coding: utf-8 -*-

# 라이브러리 불러오기
import pandas as pd
import seaborn as sns

# titanic 데이터셋에서 age, sex 등 5개 열을 선택하여 데이터프레임 만들기
titanic = sns.load_dataset('titanic')
df = titanic.loc[:, ['age', 'sex', 'class', 'fare', 'survived']]

# class 열을 기준으로 분할
grouped = df.groupby(['class'])

# 각 그룹에 대한 모든 열의 표준편차를 집계하여 데이터프레임으로 반환
std_all = grouped.std()
print(std_all)
print('\n')
print(type(std_all))
print('\n')

# 각 그룹에 대한 fare 열의 표준편차를 집계하여 시리즈로 반환
std_fare = grouped.fare.std()
print(std_fare)
print('\n')
print(type(std_fare))
print('\n')


# 그룹 객체에 agg() 메소드 적용 - 사용자 정의 함수를 인수로 전달
def min_max(x):  # 최대값 - 최소값
    return x.max() - x.min()
import matplotlib.backends.backend_pdf
import seaborn as sns

import numpy as np
import pandas as pd
from scipy import stats
import pymc3 as pm
import os.path

if __name__ == '__main__':

	base_name = os.path.basename(__file__)[:-3]
	with pm.Model() as comparing_groups,\
			matplotlib.backends.backend_pdf.PdfPages('%s.pdf' % base_name) as pdf_all:

		tips = sns.load_dataset('tips')
		y = tips['tip'].values
		idx = pd.Categorical(tips['day']).codes  # 4 days, 4 groups
		print(set(idx))
		# prior
		means = pm.Normal('means', mu=0, sd=10, shape=len(set(idx)))
		sds = pm.HalfNormal('sds', sd=10, shape=len(set(idx)))
		# likehood
		y = pm.Normal('y', mu=means[idx], sd=sds[idx], observed=y)

		trace = pm.sample(5000, njobs=1)
		chain = trace[100::]
		fig = plt.figure()
		pm.traceplot(chain)
		pdf_all.savefig()
		
# plotting strip plot with seaborn
ax = sns.stripplot(x, y)

# giving labels to x-axis and y-axis
ax.set(xlabel='Days', ylabel='Amount_spend')

# giving title to the plot
plt.title('My first graph')

# 2. STRIPPLOT USING INBUILT DATA-SET GIVEN IN SEABORN

# use to set style of background of plot
sns.set(style="whitegrid")

# loading data-set
iris = sns.load_dataset('iris')

# plotting strip plot with seaborn
# deciding the attributes of dataset on which plot should be made
ax = sns.stripplot(x='species', y='sepal_length', data=iris)

# giving title to the plot
plt.title('Graph')

# 3. SWARMPLOT

# use to set style of background of plot
sns.set(style="whitegrid")

# loading data-set
iris = sns.load_dataset('iris')
# 2   44.5   39.3       45.1   10.4
# 3   17.2   45.9       69.3    9.3
# 4  151.5   41.3       58.5   18.5
# 5  180.8   10.8       58.4   12.9

# Plot:
sns.pairplot(data,
             x_vars=['TV','Radio','Newspaper'],
             y_vars='Sales',
             size=7,
             aspect=1,
             kind='reg') # kind : {‘scatter’, ‘reg’},


## IRIS dafa
iris = sns.load_dataset("iris")
print iris.head()
#    sepal_length  sepal_width  petal_length  petal_width species
# 0           5.1          3.5           1.4          0.2  setosa
# 1           4.9          3.0           1.4          0.2  setosa
# 2           4.7          3.2           1.3          0.2  setosa
# 3           4.6          3.1           1.5          0.2  setosa
# 4           5.0          3.6           1.4          0.2  setosa


## IRIS plot1
# Draw scatterplots for joint relationships and histograms for univariate distributions
sns.pairplot(iris)

## IRIS plot2
# Show different levels of a categorical variable by the color of plot elements:
## 색상 팔렛트
## Seaborn은 스타일 지정을 위한 색상 팔렛트(color palette)라는 것을 지원한다.
## 색상 팔렛트는 Matplotlib의 칼라맵(colormap)으로 사용할 수도 있다.
current_palette = sns.color_palette()
sns.palplot(current_palette)

sns.palplot(sns.color_palette("Blues"))

sns.palplot(sns.dark_palette("muted purple", input="xkcd"))


## 1차원 분포 플롯
## 1차원 데이터는 실수 값이면 히스토그램과 같은 실수 분포 플롯으로 나타내고
## 카테고리 값이면 카운트 플롯으로 나타낸다.
iris = sns.load_dataset("iris")          # 붓꽃 데이터
titanic = sns.load_dataset("titanic")    # 타이타닉호 데이터
tips = sns.load_dataset("tips")          # 팁 데이터
flights = sns.load_dataset("flights")    # 여객운송 데이터


## 1차원 실수 분포 플롯
## 실수 분포 플롯은 자료의 분포를 묘사하기 위한 것으로
## Matplotlib의 단순한 히스토그램과 달리
## 커널 밀도(kernel density) 및 러그(rug) 표시 기능 및 다차원 복합 분포 기능 등을 제공한다.
## 1차원 실수 분포 플롯 명령에는 rugplot, kdeplot, distplot이 있다.

# 러그(rug) 플롯은 데이터 위치를 x축 위에 작은 선분(rug)으로 나타내어
# 실제 데이터들의 위치를 보여준다.
x = iris.petal_length.values
"""
Violinplot from a wide-form dataset
===================================

_thumb: .6, .45
"""
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="whitegrid")

# Load the example dataset of brain network correlations
df = sns.load_dataset("brain_networks", header=[0, 1, 2], index_col=0)

# Pull out a specific subset of networks
used_networks = [1, 3, 4, 5, 6, 7, 8, 11, 12, 13, 16, 17]
used_columns = (df.columns.get_level_values("network")
                          .astype(int)
                          .isin(used_networks))
df = df.loc[:, used_columns]

# Compute the correlation matrix and average over networks
corr_df = df.corr().groupby(level="network").mean()
corr_df.index = corr_df.index.astype(int)
corr_df = corr_df.sort_index().T

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 6))

# Draw a violinplot with a narrower bandwidth than the default
sns.violinplot(data=corr_df, palette="Set3", bw=.2, cut=1, linewidth=1)
Exemple #56
0
# print(ebola2.variable[0].split('_'))
# print(ebola2.variable[1947].split('_')[1])
# 문자열 처리시 str접근자 사용, 날짜형 처리시 datetype 접근자를 사용한다.
# ebola3 = ebola2.variable.str.split('_')
# print(ebola3)
# print(ebola3.str.get(1))
# ebola2['status'] = ebola3.str.get(0)
# ebola2['country'] = ebola3.str.get(1)
# print(ebola2)
# ebola2['new'] = ebola2['status']+ebola2['country']
# print(ebola2)
# --------------------------
# print(ebola.info())
# print(ebola.head())
# ebola['Date'] = pd.to_datetime(ebola['Date'])
# print(ebola.info())
# print(ebola.head())
# print(ebola['Date'][0])
# print(ebola['Date'][0].year)
# print(ebola['Date'][0].month)
# print(ebola['Date'][0].day)
# ebola['year'] = ebola['Date'].dt.year
# ebola['month'] = ebola['Date'].dt.month
# ebola['day'] = ebola['Date'].dt.day
# print(ebola)
tips = sns.load_dataset('tips')
print(tips)
print(tips.info())
tips['sex'] = tips['sex'].astype(str)
tips['total_bill'] = tips['total_bill'].astype(str)
print(tips.info())
"""
Horizontal bar plots
====================

"""
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="whitegrid")

# Initialize the matplotlib figure
f, ax = plt.subplots(figsize=(6, 15))

# Load the example car crash dataset
crashes = sns.load_dataset("car_crashes").sort("total", ascending=False)

# Plot the total crashes
sns.set_color_codes("pastel")
sns.barplot(x="total", y="abbrev", data=crashes,
            label="Total", color="b")

# Plot the crashes where alcohol was involved
sns.set_color_codes("muted")
sns.barplot(x="alcohol", y="abbrev", data=crashes,
            label="Alcohol-involved", color="b")

# Add a legend and informative axis label
ax.legend(ncol=2, loc="lower right", frameon=True)
ax.set(xlim=(0, 24), ylabel="",
       xlabel="Automobile collisions per billion miles")
sns.despine(left=True, bottom=True)
Exemple #58
0
import seaborn as sns

df = sns.load_dataset("tips")
print(df.head())
df.head().to_html("Ch14_1_4.html")


Exemple #59
0
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 21 15:53:11 2016

@author: shumpei
"""

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
#import pivottablejs.pivot_ui as pvui 

df=pd.read_csv('unrestricted_hcp_freesurfer.csv')

# plot distribution
sns_dist = sns.distplot(df.FS_3rdVent_Vol)
# save a figure
sns_dist.savefig('distExample2.eps')

# load example small data set 
iris = sns.load_dataset("iris")  ## Rでお馴染みのアヤメの統計データ
sns_pair = sns.pairplot(iris, hue="species")
sns_pair.savefig('pairExample.eps')
Exemple #60
0
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
flights = sns.load_dataset('flights')
tips = sns.load_dataset('tips')
tc = tips.corr()

# sns.heatmap(tc,annot=True)
# plt.show()
fp = flights.pivot_table(index="month",columns="year",values="passengers")
# print(fp)
# sns.heatmap(fp,cmap="magma",linecolor="white",linewidths=.5)
# plt.show()

#
# sns.clustermap(fp)
# plt.show()