def test_seabornw(self): """Test ipython widget for seaborn.""" if not self.has_ipywidgets(): raise self.SkipTest("This test requires ipywidgets") tips = sns.load_dataset("tips") snw.jointplot(tips) titanic = sns.load_dataset("titanic") snw.countplot(titanic) tips = sns.load_dataset("tips") snw.jointplot(tips) snw.swarmplot(tips) snw.lmplot(tips) exercise = sns.load_dataset("exercise") snw.factorplot(exercise) snw.violinplot(tips) snw.stripplot(tips) snw.swarmplot(tips) snw.pointplot(tips) snw.barplot(tips) np.random.seed(0) uniform_data = np.random.rand(10, 12) snw.heatmap(uniform_data) flights = sns.load_dataset("flights") flights = flights.pivot("month", "year", "passengers") snw.clustermap(flights)
def corrplot_example(): """ Birds-eye view of a large dataset to see correlation matrix with a heat map. Also gets a permutationt test to get p values. If you have a huge dataset, will take a while and p values aren't relevant. """ titanic = sns.load_dataset("titanic").dropna() # load dataset1 attention = sns.load_dataset("attention") # load dataset2 sns.set_context(rc={"figure.figsize": (8,8)}) # set size sns.corrplot(titanic) # plot dataset1 #sns.corrplot(titanic, # dataset # sig_tail="upper", # specify if only want pos or neg values # cmap_range(-.3, 0)) # specify colormap range sns.corrplot(attention) # plot dataset2 plt.show()
def load_dataset(name: str) -> pd.DataFrame: """Load example dataset. If seaborn is present, its datasets can be loaded. Physt also includes some datasets in CSV format. """ # Our custom datasets: try: binary_data = pkgutil.get_data('physt', 'examples/{0}.csv'.format(name)) return pd.read_csv(io.BytesIO(binary_data)) except FileNotFoundError: pass # Seaborn datasets? try: import seaborn as sns import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") if name in sns.get_dataset_names(): return sns.load_dataset(name) except ImportError: pass # Fall through raise RuntimeError("Dataset {0} not available.".format(name))
def facetgrid_example(): """ Use FacetGrid to visualize distribution of a variable's relationship between multiple variables; plot shows the same relationship conditioned on different levels of other variables * Main approach, FacetGrid.map() with name(s) of variables in dataframe * FacetGrid initializes grid and sets up figure and axes """ tips = sns.load_dataset("tips") # print tips.head() # total_bill tip sex smoker day time size # 0 16.99 1.01 Male No Sun Dinner 2 # Histogram with split by Time (Dinner, Lunch) my_grid = sns.FacetGrid(tips, col="time") # Splits by 'Dinner' and 'Lunch' my_grid.map(plt.hist, "tip") plt.show() # Scatterplot with split by Gender (Male, Female) my_grid = sns.FacetGrid(tips, col="sex", hue="smoker") # Splits by M/F my_grid.map(plt.scatter, "total_bill", "tip", alpha=.7) my_grid.add_legend() plt.show() # Scatterplot with 4 way split by Time (Dinner, Lunch) and Smoker (Yes, No) my_grid = sns.FacetGrid(tips, row="smoker", col="time", margin_titles=True) my_grid.map(sns.regplot, "size", "total_bill", color=".3", fit_reg=False, x_jitter=.1) plt.show() # Barplot split by Day (Fri, Sat, Sun) grouped by Gender (Male, Female) my_grid = sns.FacetGrid(tips, col="day", size=4, aspect=.5) my_grid.map(sns.barplot, "sex", "total_bill") plt.show()
def scatterplot(): '''Fancy scatterplots, using the package "seaborn" ''' import seaborn as sns df = sns.load_dataset("iris") sns.pairplot(df, hue="species", size=2.5) C2_8_mystyle.printout_plain('multiScatterplot.png')
def test_pandasw(self): """Test ipython widget for pandas.""" if not self.has_ipywidgets(): raise self.SkipTest("This test requires ipywidgets") import seaborn as sns data = sns.load_dataset("iris") assert pdw.plot(data)
def scatterplot(): import seaborn as sns sns.set() sns.set_context('poster') df = sns.load_dataset("iris") sns.pairplot(df, hue="species", size=2.5) mystyle.printout_plain('multiScatterplot.png')
def demo01(): import matplotlib.pyplot as plt import seaborn as sns sns.set() iris = sns.load_dataset('iris') print(iris.head()) sns.pairplot(iris, hue='species', size=1.5) plt.show()
def facet_grid(): tips = sns.load_dataset('tips') tips.head() tips['tip_pct'] = 100 * tips['tip'] / tips['total_bill'] grid = sns.FacetGrid(tips, row="sex", col="time", margin_titles=True) grid.map(plt.hist, "tip_pct", bins=np.linspace(0, 40, 15)); plt.show()
def series_tiempo(): planets = sns.load_dataset('planets') planets.head() with sns.axes_style('white'): g = sns.factorplot("year", data=planets, aspect=1.5) g.set_xticklabels(step=5) plt.show()
def test1(): sns.set(style="ticks") # Load the example tips dataset tips = sns.load_dataset("tips") # Draw a nested boxplot to show bills by day and sex sns.boxplot(x="day", y="total_bill", hue="sex", data=tips, palette="PRGn") sns.despine(offset=10, trim=True) plt.show()
def test2(): import seaborn as sns sns.set(style="whitegrid", palette="pastel", color_codes=True) # Load the example tips dataset tips = sns.load_dataset("tips") print tips # Draw a nested violinplot and split the violins for easier comparison sns.violinplot(x="day", y="total_bill", hue="sex", data=tips, split=True, inner="quart", palette={"Male": "b", "Female": "y"}) sns.despine(left=True)
def demo02(): import matplotlib.pyplot as plt import seaborn as sns sns.set() iris = sns.load_dataset('iris') print(iris.head()) print(iris.shape) X_iris = iris.drop('species', axis=1) print(X_iris.head()) print(X_iris.shape) y_iris = iris['species'] print(y_iris.head()) print(y_iris.shape)
def plot_iris(): iris = sns.load_dataset("iris") sns.set(style="ticks", color_codes=True) plt.xkcd() figure = sns.pairplot(iris, size=4, x_vars=["sepal_width", "sepal_length"], y_vars=["petal_width", "petal_length"], hue="species") sio = BytesIO() figure.savefig(sio, format="png") image = base64.encodebytes(sio.getvalue()).decode() #image = base64.b64encode(sio.getvalue()) plt.clf() return image
def test(): sns.set(style="whitegrid") # Load the example Titanic dataset titanic = sns.load_dataset("titanic") print titanic # Set up a grid to plot survival probability against several variables g = sns.PairGrid(titanic, y_vars="survived", x_vars=["class", "sex", "who", "alone"], size=5, aspect=.5) # Draw a seaborn pointplot onto each Axes g.map(sns.pointplot, color=sns.xkcd_rgb["plum"]) g.set(ylim=(0, 1)) sns.despine(fig=g.fig, left=True)
def get_data(): # Load the Iris Data iris = sns.load_dataset("iris") # Now lets get the data and labels data = iris[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].values labels = np.array([i//50 for i in range(iris.shape[0])]) # For now we assume two compponents, to make plotting easier. pca = PCA(n_components=2) # Fit model to the data pca.fit(data) # Compute the transformed data (rotation to PCA space) data_reduced = pca.transform(data) return np.concatenate((data_reduced, labels.reshape((150, 1))), axis=1)
def denggaoxian(): sns.set(style="darkgrid") iris = sns.load_dataset("iris") # Subset the iris dataset by species setosa = iris.query("species == 'setosa'") virginica = iris.query("species == 'virginica'") # Set up the figure f, ax = plt.subplots(figsize=(8, 8)) ax.set_aspect("equal") # Draw the two density plots ax = sns.kdeplot(setosa.sepal_width, setosa.sepal_length, cmap="Reds", shade=True, shade_lowest=False) ax = sns.kdeplot(virginica.sepal_width, virginica.sepal_length, cmap="Blues", shade=True, shade_lowest=False) # Add labels to the plot red = sns.color_palette("Reds")[-2] blue = sns.color_palette("Blues")[-2] ax.text(2.5, 8.2, "user1", size=16, color=blue) ax.text(3.8, 4.5, "user2", size=16, color=red) plt.show()
def pairgrid_pairplot_example(): """ Draw a grid of small subplots with the same plot type in each PairGrid, each row and col is assigned to a different variable showing a plot of each pairwise relationship (scatterplot matrix) * PairGrid shows different relationship conditioned on different levels of other variables. * pairplot is a quicker looker at the dataset """ # Relationship plot iris = sns.load_dataset("iris") my_grid = sns.PairGrid(iris) my_grid.map(plt.scatter) plt.show() # Relationship plot and coloring a separate categorical variable my_grid = sns.PairGrid(iris, hue="species", palette="Set2") my_grid.map(plt.scatter) my_grid.add_legend() plt.show()
def download(): # Load the example tips dataset iris = sns.load_dataset("iris") # Plot tip as a function of toal bill across days g = sns.lmplot( x="sepal_length", y="sepal_width", hue="species", truncate=True, size=5, data=iris ) # Use more informative axis labels than are provided by default g.set_axis_labels("Sepal length (mm)", "Sepal width (mm)") fig = g.fig buf = BytesIO() fig.canvas.print_figure(buf, format='svg') headers = { 'Content-Type': 'image/svg+xml', 'Content-Disposition': 'attachment; filename="graph.svg"', } return http.Response(buf.getvalue(), headers=headers)
def iris(): from sklearn.cross_validation import train_test_split import matplotlib.pyplot as plt import seaborn as sns sns.set() iris = sns.load_dataset('iris') print(iris.head()) print(iris.shape) X_iris = iris.drop('species', axis=1) # print(X_iris.head()) # print(X_iris.shape) y_iris = iris['species'] # print(y_iris.head()) # print(y_iris.shape) Xtrain, Xtest, ytrain, ytest = train_test_split(X_iris, y_iris, random_state=1) from sklearn.naive_bayes import GaussianNB # 1.选择模型类 model = GaussianNB() # 2.初始化模型 model.fit(Xtrain, ytrain) # 3.用模型拟合数据 y_model = model.predict(Xtest) # 4.对新数据进行预测 from sklearn.metrics import accuracy_score print(accuracy_score(ytest, y_model))
""" Scatterplot with categorical and numerical semantics ==================================================== _thumb: .45, .5 """ import seaborn as sns import matplotlib.pyplot as plt sns.set(style="whitegrid") # Load the example diamonds dataset diamonds = sns.load_dataset("diamonds") # Draw a scatter plot while assigning point colors and sizes to different # variables in the dataset f, ax = plt.subplots(figsize=(6.5, 6.5)) sns.despine(f, left=True, bottom=True) clarity_ranking = ["I1", "SI2", "SI1", "VS2", "VS1", "VVS2", "VVS1", "IF"] sns.scatterplot(x="carat", y="price", hue="clarity", size="depth", palette="ch:r=-.2,d=.3_r", hue_order=clarity_ranking, sizes=(1, 8), linewidth=0, data=diamonds, ax=ax)
import pandas as pd import numpy as np import seaborn as sns from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score import matplotlib.pyplot as plt # importing the dataset iris_df = sns.load_dataset("iris"); print(iris_df.head()) # Creating feature set X = iris_df.drop(["species"], axis=1) # Creating label set y = iris_df["species"] # Converting labels to numbers le = LabelEncoder() y = le.fit_transform(y) # Dividing data into 80-20% training and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0) # applying scaling on training and test data sc = StandardScaler()
import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import matplotlib as mpl titanic = sns.load_dataset('titanic') from pdsh_pandas import display planets = sns.load_dataset('planets') print(planets.shape) print(planets.head) rng = np.random.RandomState(42) ser = pd.Series(rng.rand(10)) print(ser.sum()) print(ser.mean()) df = pd.DataFrame({'A': rng.rand(5), 'B': rng.rand((5))}) print(df.mean(axis=1)) print(planets.dropna().describe()) df = pd.DataFrame( { 'key': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A'], 'data1': rng.rand(10) }, columns=['key', 'data1']) print(df) print(df.groupby('key').sum())
#Iris Data sorted into columns #Hugh O'Reilly 04/03/18 import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import scipy as sc iris = sns.load_dataset("data/iris.csv") iris.head()
plt.axis('equal') plt.show() #%% sns.set_style("white") sns.kdeplot(x, y) #%% sns.kdeplot(x, y, cmap="Blues", shade=True, shade_lowest=True, n_levels=5) #%% # Contour plot with Iris dataset df = sns.load_dataset('iris') # Contour plot 2D básico sns.set_style("white") sns.kdeplot(df.sepal_width, df.sepal_length) #%% sns.kdeplot(df.sepal_width, df.sepal_length, cmap="Reds", shade=True, bw=.15, shade=True) #%%
d.plot() d.plot(subplots=True) #将各列单独画在一个图形 #柱形图 fig, axes = plt.subplots(2, 1) s = pd.Series(rand(10).cumsum(), index=list('abcdefghij')) s.plot(kind='bar', ax=axes[0], color='k', alpha=0.5) s.plot(kind='barh', ax=axes[1], color='k', alpha=0.5) d = pd.DataFrame(rand(10, 4).cumsum(0), index=list('abcdefghij'), columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus')) d.plot(kind='bar', alpha=0.5) #堆积图 d.plot(kind='bar', stacked=True, alpha=0.5) #直方图 import seaborn tips = seaborn.load_dataset('tips') tips['tip_pct'] = tips['tip'] / tips['total_bill'] tips['tip_pct'].hist() #密度图 tips['tip_pct'].plot(kind='kde') #散点图矩阵 from sklearn import datasets iris = datasets.load_iris() pd.scatter_matrix(pd.DataFrame(iris.data), diagonal='kde', alpha=0.5)
import warnings warnings.simplefilter('ignore') import numpy from matplotlib import pyplot import seaborn import probscale clear_bkgd = {'axes.facecolor': 'none', 'figure.facecolor': 'none'} seaborn.set(style='ticks', context='talk', color_codes=True, rc=clear_bkgd) # load up some example data from the seaborn package tips = seaborn.load_dataset("tips") iris = seaborn.load_dataset("iris") position, bill = probscale.plot_pos(tips['total_bill']) position *= 100 fig, ax = pyplot.subplots(figsize=(6, 3)) ax.plot(position, bill, marker='.', linestyle='none', label='Bill amount') ax.set_xlabel('Percentile') ax.set_ylabel('Total Bill (USD)') ax.set_yscale('log') ax.set_ylim(bottom=1, top=100) seaborn.despine()
# -*- coding:utf-8 -*- # __author__ = "Lyon" # Date = 2018/5/31 import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib.backends.backend_agg import FigureCanvasAgg from matplotlib.figure import Figure import numpy as np import pandas as pd import seaborn as sns # ------------------------------------------------------------ # 数据可视化的重要性 Anscombe's quartet 安斯库姆四重奏 # ------------------------------------------------------------ anscombe_df = sns.load_dataset("anscombe") print(anscombe_df.head()) anscombe_df.groupby("dataset").agg([np.mean, np.var]) sns.set(style="ticks") sns.lmplot(x="x", y="y", col="dataset", hue="dataset", data=anscombe_df, col_wrap=2, ci=None, palette="muted", size=4, scatter_kws={ "s": 50,
# 导入库 import sys import seaborn as sn sn.set_style("whitegrid") import matplotlib.pyplot as plt names = sn.get_dataset_names() print(f'dataset names: {names}') #sys.exit(0) # 导入自带数据 tips = sn.load_dataset("tips") # csv 文件 # 单一分组参数 sn.lmplot(x="total_bill", y="tip", data=tips) plt.savefig("./lmplot1.png", dpi=600) # 分组的线性回归图,hue参数控制分类属性 sn.lmplot(x="total_bill", y="tip", hue="smoker", markers=["o", "*"], data=tips) plt.savefig("./lmplot2.png", dpi=600) # col+hue 双分组参数,既分组,又分子图绘制 # 同时控制axes 列数,及 size 尺寸 sn.lmplot(x="total_bill", y="tip", col="day", hue="day",
import numpy as np import pandas as pd from numpy.random import randn import matplotlib as mlp import matplotlib.pyplot as plt import seaborn as sns df = sns.load_dataset('flights') df2 = df.pivot('year', 'month', 'passengers') print(df2) #sns.clustermap(df2).savefig('cluster1.png') #sns.clustermap(df2, col_cluster = False).savefig('cluster2.png') #Standardization #standard_scale = 0 means standardization by rows sns.clustermap(df2, standard_scale=0).savefig('cluster3.png') sns.clustermap(df2, standard_scale=1).savefig('cluster4.png')
N = m + n;M = m * n / N;K = 1.36 p = np.arange(m)/(m-1.) yq = qy(p) yl = qy(p - K/np.sqrt(M)) yu = qy(p + K/np.sqrt(M)) x=x.values plt.plot(x, yq,linestyle=' ', marker='o', color='k'); plt.plot(x, yl,color='k',alpha=0.4); plt.plot(x, yu,color='k',alpha=0.4) tips = sns.load_dataset("tips").iloc[:,[0,1]];tips.columns=[0,1] stats.probplot(x, dist="norm", plot=plt) x.sort_values() x=tips[0] _, xr = \ stats.probplot(np.log(np.random.rand(1000)), fit=False,dist='uniform', plot=plt); plt.gca().set_xscale("log");plt.gca().set_yscale("log") stats.probplot(y, fit=False,dist='uniform')[1] stats.probplot(y, fit=False,dist='norm')[1] df=-pd.DataFrame(np.random.rand(100,2)).applymap(np.log) plt.scatter(a[0].sort_values(ascending=False),a[1].sort_values(ascending=False))
from seaborn import load_dataset import pandashack # noqa: F401 from pandas.testing import assert_frame_equal df_iris = load_dataset('iris') def test_simple_mutate(): df_assign = df_iris.copy().assign( new=lambda df: df.sepal_length / df.petal_width) df_mutate = df_iris.copy().mutate(new='sepal_length / petal_width') assert_frame_equal(df_assign, df_mutate) def test_mutate_with_methods(): df_assign = df_iris.copy().assign( new=lambda df: df.sepal_length.mean() / df.petal_width) df_mutate = df_iris.copy().mutate(new='sepal_length.mean() / petal_width') assert_frame_equal(df_assign, df_mutate) def test_mutate_with_variables(): a = 3 df_assign = df_iris.copy().assign( new=lambda df: df.sepal_length * a / df.petal_width) df_mutate = df_iris.copy().mutate(new='sepal_length *@a / petal_width') df_mutate_2 = df_iris.copy().mutate(new='sepal_length *a / petal_width') assert_frame_equal(df_assign, df_mutate) assert_frame_equal(df_mutate, df_mutate_2)
import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns sns.set_theme(style="darkgrid") fmri = sns.load_dataset("fmri") sns.relplot(x="timepoint", y="signal", hue="event", style="event", col="subject", col_wrap=5, height=3, aspect=.75, linewidth=2.5, kind="line", data=fmri.query("region == 'frontal'")); plt.show()
def setUp(self): self.df = sns.load_dataset('titanic')
sns.boxplot(data1, vert = False) # violin %matplotlib inline data1 = stats.norm(0,5).rvs(100) data2 = np.concatenate([stats.gamma(5).rvs(50)-1,-1*stats.gamma(5).rvs(50)]) sns.boxplot(data=[data1,data2],whis=np.inf) sns.violinplot(data=[data1,data2]) sns.violinplot(data2,bw=0.01) sns.violinplot(data1,inner='stick') #adding rugs ##### Regression plots tips = sns.load_dataset('tips') tips.head() sns.lmplot('total_bill','tip',tips) #scatter+linear fit sns.lmplot('total_bill','tip',tips, scatter_kws={'marker':'o','color':'indianred'}, line_kws={'linewidth':1,'color':'blue'}) sns.lmplot('total_bill','tip',tips,order=4, # change order of the fit scatter_kws={'marker':'o','color':'indianred'}, line_kws={'linewidth':1,'color':'blue'}) sns.lmplot('total_bill','tip',tips,fit_reg=False) # no fit tips.head()
""" Scatterplot with continuous hues and sizes ========================================== _thumb: .51, .44 """ import seaborn as sns sns.set(style="whitegrid") # Load the example planets dataset planets = sns.load_dataset("planets") cmap = sns.cubehelix_palette(rot=-.2, as_cmap=True) g = sns.relplot( data=planets, x="distance", y="orbital_period", hue="year", size="mass", palette=cmap, sizes=(10, 200), ) g.set(xscale="log", yscale="log") g.ax.xaxis.grid(True, "minor", linewidth=.25) g.ax.yaxis.grid(True, "minor", linewidth=.25) g.despine(left=True, bottom=True)
shape = ['o', '*', '^'] while count < 3: plt.plot(param_dic['boundary'][ind:ind + 33], shape[count], label=cond[count], color=color[count]) plt.axhline(np.mean(param_dic['boundary'][ind:ind + 33]), color=color[count], linestyle='--') ind = ind + 33 count += 1 print(count) import seaborn as sns sns.set_theme(style='whitegrid') tips = sns.load_dataset("tips") data1 = param_dic['boundary'][0:33] data2 = param_dic['boundary'][33:66] data3 = param_dic['boundary'][66:] ax = sns.violinplot(data=[data1, data2, data3], palette="muted") ax.set_xticklabels(['easy', 'medium', 'hard']) ax.set_ylabel('Boundary') ax.set_xlabel('Condition') plt.title('Boundary Estimates From Model 1') plt.legend() ##############varsigma############ import seaborn as sns
import seaborn as sns from bokeh import mpl from bokeh.plotting import output_file, show sns.set(style="darkgrid") # Load the long-form example gammas dataset gammas = sns.load_dataset("gammas") # Plot the response with standard error sns.tsplot(data=gammas, time="timepoint", unit="subject", condition="ROI", value="BOLD signal") output_file("tsplot.html", title="tsplot.py example") show(mpl.to_bokeh())
from flask import Flask, request, jsonify from flask_restful import Resource, Api import numpy as np import pandas as pd import seaborn as sns import statsmodels.formula.api as sm app = Flask(__name__) api = Api(app) # Setup our Prediction model tips = sns.load_dataset("tips") tips.rename(columns={'smoker': 'drinker', 'sex': 'gender'}, inplace=True) formula = 'tip ~ total_bill + size + C(gender) + C(drinker) + C(day) + C(time)' model = sm.ols(formula, data=tips) # Describe model results = model.fit() # Fit model class PredictTip(Resource): def post(self): request_data = request.get_json() try: columns = ['total_bill', 'gender', 'drinker', 'day', 'time', 'size'] df = pd.DataFrame(request_data, columns=columns) except: return {'error': 'You posted bad data!'} # Aggrigation & conversion from Numpy types to native types
# mean, cov = [0, 1], [(1, .5), (.5, 1)] # data = np.random.multivariate_normal(mean, cov, 200) # df = pd.DataFrame(data, columns=['x', 'y']) # f, ax = plt.subplots(figsize=(6, 6)) # sns.kdeplot(df.x, df.y, ax=ax) # sns.rugplot(df.x, color='g', ax=ax) # sns.rugplot(df.y, vertical=True, ax=ax) # plt.show() # mean, cov = [0, 1], [(1, .5), (.5, 1)] # data = np.random.multivariate_normal(mean, cov, 200) # df = pd.DataFrame(data, columns=['x', 'y']) # f, ax = plt.subplots(figsize=(6, 6)) # cmap = sns.cubehelix_palette(as_cmap=True, dark=0, light=1, reverse=True) # sns.kdeplot(df.x, df.y, cmap=cmap, n_levels=60, shade=True) # plt.show() sns.set(style='whitegrid', color_codes=True) np.random.seed(sum(map(ord, "categorical"))) tips = sns.load_dataset("tips") titanic = sns.load_dataset('titanic') iris = sns.load_dataset('iris') # palette 调色板 # 分组绘制箱线图,分组因子是day,在x轴不同位置绘制 # 分组箱线图,分子因子是time,不同的因子用不同颜色区分 # 相当于分组之后又分组 print tips # sns.boxplot(y="total_bill", x="day", hue="time", data=tips) # plt.show()
def plot_color_legend(): """Show how colors map back to the labeled cell types""" sns.palplot(cluster_names_to_color) ax = plt.gca() xticks = np.arange(0, cluster_names_to_color.shape[0]) ax.set(xticklabels=cluster_names_to_color.index, xticks=xticks) ax.grid(False) # plt.show() ### --- Anscombe's quartet --- ### # Convienence function that reads the data # Equivalent to: # anscombe = pd.read_csv("https://github.com/mwaskom/seaborn-data/raw/master/anscombe.csv") # But is much easier to write anscombe = sns.load_dataset('anscombe') def explore_anscombe(summary): statistical = ('mean', 'var', 'std') grouped = anscombe.groupby('dataset') col = None if summary in statistical: summarized = getattr(grouped, summary)() tidy = summarized.unstack().reset_index() tidy = tidy.rename(columns={'level_0': 'variable', 0: summary}) col = 'variable' else:
import seaborn as sns import matplotlib.pyplot as plt import numpy as np sns.set_style("darkgrid") titanic = sns.load_dataset("titanic") print(titanic.head()) print(titanic.info()) #sns.jointplot(x="fare",y="age",data=titanic) #sns.distplot(titanic["fare"],kde=False) #for single column in dataset #sns.boxplot(x="class",y="age",data=titanic,palette="rainbow") #sns.swarmplot(x="class",y="age",data=titanic,palette="Set2") #sns.countplot(x="sex",data=titanic) # cor = titanic.corr() # sns.heatmap(cor,cmap="coolwarm") # plt.title("titanic.corr()") # s = sns.FacetGrid(data=titanic,col="sex") # s.map(plt.hist,"age") plt.show()
import seaborn as sns # ---------------------------------------------------------------------------- # st.title('Integrating Seaborn and Streamlit') st.subheader('Rebecca Weng | Dec. 2020') fig_type = st.selectbox('Plot Type', ('Histogram', 'Lineplot', 'Scatterplot', 'Boxplot')) # ---------------------------------------------------------------------------- # # Adapted from "Stacked histogram on a log scale" from seaborn.pydata.org if fig_type == 'Histogram': diamonds = sns.load_dataset("diamonds") # Workaround for displaying categorical variables st.write(diamonds.astype('object')) sns.set_theme(style="darkgrid") # Slider to determine the number of bins bin_num = st.slider("Number of Bins #1", min_value=5, max_value=100, value=30, step=5) plt = sns.histplot(diamonds[diamonds['cut'] == 'Ideal'], x="price",
import seaborn as sea tipsDf = sea.load_dataset("tips") print(tipsDf.describe().T) #hangi gün daha çok kazanç #print(sea.boxplot(x= "day", y= "total_bill",data=tipsDf)) #hangi öğün daha çok kazanç #print(sea.boxplot(x= "time", y= "total_bill",data=tipsDf)) # #Kişi Sayısı Kazanç ilişkisi #print(sea.boxplot(x= "size", y= "total_bill",data=tipsDf)) print(sea.boxplot(x="day", y="total_bill", hue="sex", data=tipsDf))
print("'anagrams' data set from seaborn :", sb.load_dataset('anagrams')) print() print("car_crashes data set from seaborn :", sb.load_dataset('car_crashes')) print() print(pd.read_csv("student_records.csv")) # loading user data set using pandas library # In[1]: # Questions 5: # Which country origin cars are a part of this dataset? import seaborn as sb import pandas as pd data_set = sb.load_dataset('mpg') print(data_set) df = pd.DataFrame(data_set) print(" country origin :", df.origin.unique()) # Output : # /home/yogi/Desktop/Python_Code/venv/bin/python /home/yogi/Desktop/Python_Code/ # Lets_Upgrade_Assignments/Day4/Day4_Que5.py # mpg cylinders ... origin name # 0 18.0 8 ... usa chevrolet chevelle malibu # 1 15.0 8 ... usa buick skylark 320 # 2 18.0 8 ... usa plymouth satellite # 3 16.0 8 ... usa amc rebel sst # 4 17.0 8 ... usa ford torino # .. ... ... ... ... ...
return newWeights def addXNoughts(self, x): x = np.reshape(x, ((x.shape[0], np.size( x[0])))) # reshapes flat array or 2d array into transposed version x0 = np.ones((x.shape[0], 1), dtype=np.uint8) x = np.hstack((x0, x)) return x def train(self, epochs, x, y, learningRate): x = self.addXNoughts(x) for i in range(epochs): self.w = self.doGradientDescent(learningRate, x, y) print("epoch " + str(i)) #print(self.getCost(x,y)) # shows progress print("\nFinal Cost:") print(self.getCost(x, y)) learningRate = 0.01 epochs = 200 iris = sns.load_dataset('iris').to_numpy()[:, :4].astype(np.float32) np.random.shuffle(iris) x = iris[:, 0:3] y = iris[:, 3:4] a = LinearRegression(x) a.train(epochs, x, y, learningRate)
# -*- coding: utf-8 -*- """ Created on Thu Jul 7 16:45:49 2016 @author: Dell """ import seaborn as sns; sns.set() flights = sns.load_dataset("flights") flights = flights.pivot("month", "year", "passengers") g = sns.clustermap(flights)
import seaborn as sns import pandas as pd import numpy as np sns.set(style="ticks", color_codes=True) iris = sns.load_dataset("iris") g = sns.pairplot(iris, hue="species", palette="husl")
__author__ = 'Noventa' import numpy as np from numpy.random import randn import pandas as pd from scipy import stats import matplotlib as mpl import matplotlib.pyplot as plt import seaborn as sns flights = sns.load_dataset('flights') print(flights.head()) flights = flights.pivot('month', 'year', 'passengers') print(flights) ''' Backend error on windows macOS with heat maps... so sad. Will try Bokeh sns.heatmap(flights) plt.show() ''' import bokeh as bo from bokeh.charts import HeatMap, output_file, show output_file('heatmap.html') p = HeatMap(flights, title='Flights') show(p)
# -*- coding: utf-8 -*- # 라이브러리 불러오기 import pandas as pd import seaborn as sns # titanic 데이터셋에서 age, sex 등 5개 열을 선택하여 데이터프레임 만들기 titanic = sns.load_dataset('titanic') df = titanic.loc[:, ['age', 'sex', 'class', 'fare', 'survived']] # class 열을 기준으로 분할 grouped = df.groupby(['class']) # 각 그룹에 대한 모든 열의 표준편차를 집계하여 데이터프레임으로 반환 std_all = grouped.std() print(std_all) print('\n') print(type(std_all)) print('\n') # 각 그룹에 대한 fare 열의 표준편차를 집계하여 시리즈로 반환 std_fare = grouped.fare.std() print(std_fare) print('\n') print(type(std_fare)) print('\n') # 그룹 객체에 agg() 메소드 적용 - 사용자 정의 함수를 인수로 전달 def min_max(x): # 최대값 - 최소값 return x.max() - x.min()
import matplotlib.backends.backend_pdf import seaborn as sns import numpy as np import pandas as pd from scipy import stats import pymc3 as pm import os.path if __name__ == '__main__': base_name = os.path.basename(__file__)[:-3] with pm.Model() as comparing_groups,\ matplotlib.backends.backend_pdf.PdfPages('%s.pdf' % base_name) as pdf_all: tips = sns.load_dataset('tips') y = tips['tip'].values idx = pd.Categorical(tips['day']).codes # 4 days, 4 groups print(set(idx)) # prior means = pm.Normal('means', mu=0, sd=10, shape=len(set(idx))) sds = pm.HalfNormal('sds', sd=10, shape=len(set(idx))) # likehood y = pm.Normal('y', mu=means[idx], sd=sds[idx], observed=y) trace = pm.sample(5000, njobs=1) chain = trace[100::] fig = plt.figure() pm.traceplot(chain) pdf_all.savefig()
# plotting strip plot with seaborn ax = sns.stripplot(x, y) # giving labels to x-axis and y-axis ax.set(xlabel='Days', ylabel='Amount_spend') # giving title to the plot plt.title('My first graph') # 2. STRIPPLOT USING INBUILT DATA-SET GIVEN IN SEABORN # use to set style of background of plot sns.set(style="whitegrid") # loading data-set iris = sns.load_dataset('iris') # plotting strip plot with seaborn # deciding the attributes of dataset on which plot should be made ax = sns.stripplot(x='species', y='sepal_length', data=iris) # giving title to the plot plt.title('Graph') # 3. SWARMPLOT # use to set style of background of plot sns.set(style="whitegrid") # loading data-set iris = sns.load_dataset('iris')
# 2 44.5 39.3 45.1 10.4 # 3 17.2 45.9 69.3 9.3 # 4 151.5 41.3 58.5 18.5 # 5 180.8 10.8 58.4 12.9 # Plot: sns.pairplot(data, x_vars=['TV','Radio','Newspaper'], y_vars='Sales', size=7, aspect=1, kind='reg') # kind : {‘scatter’, ‘reg’}, ## IRIS dafa iris = sns.load_dataset("iris") print iris.head() # sepal_length sepal_width petal_length petal_width species # 0 5.1 3.5 1.4 0.2 setosa # 1 4.9 3.0 1.4 0.2 setosa # 2 4.7 3.2 1.3 0.2 setosa # 3 4.6 3.1 1.5 0.2 setosa # 4 5.0 3.6 1.4 0.2 setosa ## IRIS plot1 # Draw scatterplots for joint relationships and histograms for univariate distributions sns.pairplot(iris) ## IRIS plot2 # Show different levels of a categorical variable by the color of plot elements:
## 색상 팔렛트 ## Seaborn은 스타일 지정을 위한 색상 팔렛트(color palette)라는 것을 지원한다. ## 색상 팔렛트는 Matplotlib의 칼라맵(colormap)으로 사용할 수도 있다. current_palette = sns.color_palette() sns.palplot(current_palette) sns.palplot(sns.color_palette("Blues")) sns.palplot(sns.dark_palette("muted purple", input="xkcd")) ## 1차원 분포 플롯 ## 1차원 데이터는 실수 값이면 히스토그램과 같은 실수 분포 플롯으로 나타내고 ## 카테고리 값이면 카운트 플롯으로 나타낸다. iris = sns.load_dataset("iris") # 붓꽃 데이터 titanic = sns.load_dataset("titanic") # 타이타닉호 데이터 tips = sns.load_dataset("tips") # 팁 데이터 flights = sns.load_dataset("flights") # 여객운송 데이터 ## 1차원 실수 분포 플롯 ## 실수 분포 플롯은 자료의 분포를 묘사하기 위한 것으로 ## Matplotlib의 단순한 히스토그램과 달리 ## 커널 밀도(kernel density) 및 러그(rug) 표시 기능 및 다차원 복합 분포 기능 등을 제공한다. ## 1차원 실수 분포 플롯 명령에는 rugplot, kdeplot, distplot이 있다. # 러그(rug) 플롯은 데이터 위치를 x축 위에 작은 선분(rug)으로 나타내어 # 실제 데이터들의 위치를 보여준다. x = iris.petal_length.values
""" Violinplot from a wide-form dataset =================================== _thumb: .6, .45 """ import seaborn as sns import matplotlib.pyplot as plt sns.set(style="whitegrid") # Load the example dataset of brain network correlations df = sns.load_dataset("brain_networks", header=[0, 1, 2], index_col=0) # Pull out a specific subset of networks used_networks = [1, 3, 4, 5, 6, 7, 8, 11, 12, 13, 16, 17] used_columns = (df.columns.get_level_values("network") .astype(int) .isin(used_networks)) df = df.loc[:, used_columns] # Compute the correlation matrix and average over networks corr_df = df.corr().groupby(level="network").mean() corr_df.index = corr_df.index.astype(int) corr_df = corr_df.sort_index().T # Set up the matplotlib figure f, ax = plt.subplots(figsize=(11, 6)) # Draw a violinplot with a narrower bandwidth than the default sns.violinplot(data=corr_df, palette="Set3", bw=.2, cut=1, linewidth=1)
# print(ebola2.variable[0].split('_')) # print(ebola2.variable[1947].split('_')[1]) # 문자열 처리시 str접근자 사용, 날짜형 처리시 datetype 접근자를 사용한다. # ebola3 = ebola2.variable.str.split('_') # print(ebola3) # print(ebola3.str.get(1)) # ebola2['status'] = ebola3.str.get(0) # ebola2['country'] = ebola3.str.get(1) # print(ebola2) # ebola2['new'] = ebola2['status']+ebola2['country'] # print(ebola2) # -------------------------- # print(ebola.info()) # print(ebola.head()) # ebola['Date'] = pd.to_datetime(ebola['Date']) # print(ebola.info()) # print(ebola.head()) # print(ebola['Date'][0]) # print(ebola['Date'][0].year) # print(ebola['Date'][0].month) # print(ebola['Date'][0].day) # ebola['year'] = ebola['Date'].dt.year # ebola['month'] = ebola['Date'].dt.month # ebola['day'] = ebola['Date'].dt.day # print(ebola) tips = sns.load_dataset('tips') print(tips) print(tips.info()) tips['sex'] = tips['sex'].astype(str) tips['total_bill'] = tips['total_bill'].astype(str) print(tips.info())
""" Horizontal bar plots ==================== """ import seaborn as sns import matplotlib.pyplot as plt sns.set(style="whitegrid") # Initialize the matplotlib figure f, ax = plt.subplots(figsize=(6, 15)) # Load the example car crash dataset crashes = sns.load_dataset("car_crashes").sort("total", ascending=False) # Plot the total crashes sns.set_color_codes("pastel") sns.barplot(x="total", y="abbrev", data=crashes, label="Total", color="b") # Plot the crashes where alcohol was involved sns.set_color_codes("muted") sns.barplot(x="alcohol", y="abbrev", data=crashes, label="Alcohol-involved", color="b") # Add a legend and informative axis label ax.legend(ncol=2, loc="lower right", frameon=True) ax.set(xlim=(0, 24), ylabel="", xlabel="Automobile collisions per billion miles") sns.despine(left=True, bottom=True)
import seaborn as sns df = sns.load_dataset("tips") print(df.head()) df.head().to_html("Ch14_1_4.html")
# -*- coding: utf-8 -*- """ Created on Sun Aug 21 15:53:11 2016 @author: shumpei """ import numpy as np import matplotlib.pyplot as plt import pandas as pd import seaborn as sns #import pivottablejs.pivot_ui as pvui df=pd.read_csv('unrestricted_hcp_freesurfer.csv') # plot distribution sns_dist = sns.distplot(df.FS_3rdVent_Vol) # save a figure sns_dist.savefig('distExample2.eps') # load example small data set iris = sns.load_dataset("iris") ## Rでお馴染みのアヤメの統計データ sns_pair = sns.pairplot(iris, hue="species") sns_pair.savefig('pairExample.eps')
import seaborn as sns import matplotlib.pyplot as plt import numpy as np flights = sns.load_dataset('flights') tips = sns.load_dataset('tips') tc = tips.corr() # sns.heatmap(tc,annot=True) # plt.show() fp = flights.pivot_table(index="month",columns="year",values="passengers") # print(fp) # sns.heatmap(fp,cmap="magma",linecolor="white",linewidths=.5) # plt.show() # # sns.clustermap(fp) # plt.show()