def firmsOneRandSeedPlot(df, rSeed, varsToDraw, firmGroup, argsPlot, cols): tmpDF = df[df.randomSeed==rSeed[0]].loc[:, ['tick', firmGroup] + varsToDraw] tmpDF = tmpDF.melt(id_vars=['tick', firmGroup], value_vars = varsToDraw) cols = min(cols, len(varsToDraw)) argsPlot.update({'data' : tmpDF, 'col_wrap' : cols }) sns.relplot(**argsPlot) return
def firmsMultiPlot(df, rSeeds, varsToDraw, firmGroup, argsPlot, cols, rows): #Truncate randoSeeds and vars to proper size rSeeds = rSeeds[:cols] varsToDraw = varsToDraw[:rows] tmpDF = df.loc[df.randomSeed.isin(rSeeds), ['randomSeed','tick', firmGroup] + varsToDraw] tmpDF = tmpDF.melt(id_vars=['randomSeed','tick', firmGroup], value_vars = varsToDraw) argsPlot.update({'col' : 'randomSeed', 'row' : 'variable', 'data' : tmpDF}) sns.relplot(**argsPlot) return
def firmsOneVarPlot(df, rSeeds, varsToDraw, firmGroup, argsPlot, cols, rows): tmpDF = df.loc[df.randomSeed.isin(rSeeds), ['randomSeed','tick', firmGroup] + varsToDraw] n = cols * rows pagsOfSeeds = [rSeeds[i:i+n] for i in range(0,len(rSeeds),n)] for pagOfSeeds in pagsOfSeeds: tmpDF = df.loc[df.randomSeed.isin(pagOfSeeds), ['randomSeed','tick', firmGroup] + varsToDraw] argsPlot.update({'y' : varsToDraw[0], 'col' : "randomSeed", 'data' : tmpDF, 'col_wrap' : cols }) sns.relplot(**argsPlot) return
zip( factor_names + ['runksum', 'comparison', 'pvalue'], list(factors_values) + [[p_value_mock], [pvalue_type], [pvalue]])) #print(pvalue_dict) p_values_df = p_values_df.append(pd.DataFrame(pvalue_dict), ignore_index=True) #sns.set(rc={'figure.figsize':(2,2)}) #sns.set(font_scale=2) g = sns.relplot('threshold_factor', 'runksum', 'fb_type', col='metric_type', data=p_values_df, kind='line', row='comparison', col_order=['magnitude', 'n_spindles', 'duration', 'amplitude'], row_order=pvalue_types, height=2.5, palette=['#3CB4E8', '#438BA8', '#002A3B', '#FE4A49']) #g.axes[0][0].semilogy() #g.fig.set_size_inches(10,10) [[ax.axhline(p, color='k', linestyle='--') for ax in g.axes.flatten()] for p in [-1.65, 1.65]] for color, axes in zip(['#3CB4E8', '#438BA8', '#002A3B', '#FE4A49'], g.axes): [ax.axhspan(-1.65, 1.65, color=color, alpha=0.2) for ax in axes.flatten()] #plt.tight_layout() [ax.set_title('') for ax in g.axes.flatten()] [
from sklearn.model_selection import GridSearchCV data = pd.read_csv('1.csv', header=None) data.columns = ['score', 'date'] features = ['score', 'date'] X = data[features] x = data['date'] y = data['score'] plt.title("B00SKQFT4I") plt.xlabel("date") plt.ylabel("score") plt.style.use('fivethirtyeight') plt.plot(x, y) plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(20)) plt.show() sns.relplot(x="star", y="score", data=data) km = KMeans(6) km.fit(X) data['cluster_k6'] = km.predict(X) sns.relplot(x="star", y="score", hue="cluster_k6", palette="Set1", data=data) param_test1 = {'n_clusters': np.arange(2, 11, 1)} gsearch1 = GridSearchCV(estimator=KMeans(), param_grid=param_test1, cv=5) gsearch1.fit(X) score_list = -pd.DataFrame(gsearch1.cv_results_)['mean_test_score'] sns.lineplot(x=range(2, 11), y=score_list)
#weekmask = multiyear_df.index % 7 == 0 # Doesn't exactly work with groups. # Do based on day of week? multiyear_df.date = pd.to_datetime(multiyear_df.date) weekmask = multiyear_df.date.dt.dayofweek == 0 multiyear_week_df = multiyear_df[weekmask] # # #train_test_mask = multiyear_week_df['snowyear'] % 2 == 0 #stats.describe(train_test_mask) # A frequency table would be better, but, eh #multiyear_train = multiyear_week_df[train_test_mask] #multiyear_test = multiyear_week_df[~train_test_mask] sns.relplot( x='date', y='value', hue='stationtriplet', kind='line', data=multiyear_week_df) # Egads, this is damn slow on full dataset. sns.relplot( x='date', y='value', hue='stationtriplet_codes', kind='line', data=multiyear_week_df) # Egads, this is damn slow on full dataset. # Convert "date" to "days since mindate" datemin = min(multiyear_week_df.date) multiyear_week_df['dayssincemindate'] = ( (multiyear_week_df.date - datemin).dt.days) / 365. # For coregionalization, form 2-column input/output with data in col1 and "dimension" (label) in col2
sns.kdeplot(dfMaster1Aug.EVI) sns.kdeplot(dfMaster1Sep.EVI) dfMasterApril.loc[(dfMasterApril.Latitude<36) & (dfMasterApril.Latitude>35)].NDVI sns.kdeplot(dfMasterMarch.NDVI.loc[(dfMasterMarch.Latitude<32) & (dfMasterMarch.Latitude>30)]) sns.kdeplot(dfMasterApril.loc[(dfMasterApril.Latitude<36) & (dfMasterApril.Latitude>35)].NDVI) #dflowlat = dfMaster.loc[(dfMaster.Latitude<37.5)] #dfMaster is current, large and low ndvi dataset #dfMaster1 is old, small and high ndvi value dataset sns.relplot(x='NDVI', y='EVI', hue='year', data=dfMasterApril, legend='full') sns.relplot(x='NDVI', y='EVI', hue='year', data=dfMaster1April, legend='full') dfMaster.index.unique() dfMaster1.index.unique() dfMaster['cropland_mode'].value_counts() dfMaster1['CDL'].value_counts() dfMasterApril['cropland_mode'].value_counts() dfMaster1April['CDL'].value_counts() dfMaster.year.value_counts() dfMaster1.year.value_counts() dfMasterApril.loc(dfMasterApril.cropland_mode == 24)
def plot(df, output_path, name, selectors, grid=[], hue=None, style=None, folder=None): w = selector(df, selectors) title = ' | '.join(f"{k}:{v}" for k, v in selectors.items() if not isinstance(v, list)) print(f'start plotting {name}') dfs = df[w].copy() if (hue is not None) and pd.api.types.is_numeric_dtype(dfs[hue].dtype): dfs[hue] = "#" + dfs[hue].astype(str) for col, iscat in (dfs.dtypes == 'category').iteritems(): if iscat: dfs[col] = dfs[col].cat.remove_unused_categories() # assert indiviuallity groupby = [c for c in (grid + [hue, style, 'episode']) if c is not None] dfs = dfs.dropna(subset=groupby) df_dup = dfs.duplicated(subset=groupby) if df_dup.any(): print(dfs[df_dup].sort_values(groupby)[groupby]) raise ValueError('There are doublicates.') grid.sort(key=lambda l: dfs[l].nunique()) grid = {n: g for g, n in zip(grid[::-1], ['col', 'row'])} grid_order = { f'{k}_order': sorted([n for n in dfs[v].unique() if not pd.isnull(n)]) for k, v in grid.items() } g = sns.relplot(data=dfs, x='episode', y='value', **grid, **grid_order, hue=hue, style=style, kind="line", ci=None) plt.subplots_adjust(top=0.9) g.fig.suptitle(title) g.fig.patch.set_facecolor('white') if folder: ensure_directory(os.path.join(output_path, folder)) filename = os.path.join(output_path, folder, f"{name}.png") else: filename = os.path.join(output_path, f"{name}.png") plt.savefig(filename) print(f'Saved {filename}') plt.close()
df_array = [] for tuple in zip(approaches, representations): print(tuple) approach = tuple[0] representation = tuple[1] for query_type in query_types: if approach == "unsupervised_lm" and (representation=="" or query_type=="triggers"): continue print(os.path.join(data_directory, trg_lang, "results", "data", src_lang, approach, representation, query_type, "output.res")) result_file = open(os.path.join(data_directory, trg_lang, "results", "data", src_lang, approach, representation, query_type, "output.res")) result_strings = result_file.readlines() data = [] for result_string in result_strings: if representation!="": result = convert_result_string(result_string, approach + "_" + representation, query_type) else: result = convert_result_string(result_string, approach, query_type) data.append(result) df = pd.DataFrame(data, columns = ["#examples", "p5", "p10", "p20", "mAP", "method", "query_type"]) df_array.append(df) #all_df = pd.concat([all_df, df], ignore_index=True) #all_df = df_array[0] #for df in df_array[1:]: # all_df = all_df.append(df, ignore_index=True) all_df = pd.concat(df_array) print(all_df) sns_plot = sns.relplot(x="#examples", y="p5", col="query_type", row="method", height=3, kind="line", estimator=None, data=all_df) sns_plot.savefig(os.path.join(data_directory, trg_lang, "results", "figures", "result.pdf"))
#sns.barplot(data=df_last, x='연도',y='평당분양가격', ci=None) sns.catplot(data=df_last, x='연도', y='평당분양가격', kind='bar', col='지역명', col_wrap=4, ci=None) #lineplot으로 연도별 평당분양가격 그려보기 plt.figure(figsize=(10,5)) sns.lineplot(data=df_last, x='연도', y='평당분양가격',hue='지역명') plt.legend(bbox_to_anchor=(1.02,1), loc=2, borderaxespad=0) #replot #hue값을 한 그래프에 나타내는 것x -> 다른 그래프를 생성하여 나타낸다. sns.relplot(data=df_last, x='연도', y='평당분양가격', kind='line', col='지역명', col_wrap=4, ci=None) ####수치데이터 히스토그램그리기 # 수치형 데이터분포형을 정확히 표현해주는 그래프 # 하나의 숫자 변수만 입력해야한다. # bins : 변수를 n개의 bin으로 자른다. b = df_last['평당분양가격'].hist(bins=10) #distplot으로 히스토그램그리기 #distplot은 결측지가 있으면 에러가 난다. price = df_last.loc[df_last['평당분양가격'].notnull(), '평당분양가격'] #어떤열을쓸건지도 적어주어야함 price sns.distplot(price) #kde, rug
def graficoPersonasMuertas(self): arr=np.reshape(self.RegistroMuertos, (len(self.RegistroMuertos), 1)).T[0] data=pd.DataFrame({'horas':range(0,len(self.RegistroMuertos)),'personas_muertas':arr}) sns.relplot(x="horas",y="personas_muertas", kind="line", data=data)
import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from matplotlib.collections import LineCollection from matplotlib.colors import ListedColormap, BoundaryNorm data = pd.read_csv("Tampen.txt",delimiter='\\t', engine = 'python') data.to_csv('Tampen1.csv') df = data[['YEAR', 'TOTALHS','TOTALTP']] x = df['TOTALHS'] y = df['TOTALTP'] z = df['YEAR'] sns.set(style="darkgrid", rc={'figure.figsize':(12,8)}) sns.relplot(x="YEAR", y="TOTALHS", kind="line", data=df);
from sklearn.model_selection import train_test_split as split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression from sklearn import metrics '''load the data_set''' df_c = pd.read_csv('ctrucks_clean.csv').set_index('supplier').fillna(0.00) df_c.columns df_c['supplier'] = df_c.index df_c.drop('Unnamed: 0', axis=1, inplace=True) sns.relplot(x="supplier", y="nbags", col="year", dashes=False, markers=True, kind="dist", data=df_c) g = sns.catplot("supplier", col="year", col_wrap=1, data=df_c, kind="count", height=3, aspect=6) g.set_xticklabels(rotation=90) plt.savefig('suppyear') plt.figure(figsize=(10, 10))
import seaborn as sns import matplotlib.pyplot as plt import pandas as pd df = pd.read_csv("air_df_mean.csv") print(df.head(30)) sns.relplot(x="hour",y="NO_2_mean",data=df,kind="line", style="location",hue="location", markers=True,dashes=False) plt.show()
final_data.loc[:, 'hour'] = np.tile(range(0, 8760), 2) final_data.loc[:, 'solar_tech'] = solar_tech return final_data # Make all the data package_temps = pd.concat( [analyze_temp(st) for st in ['swh', 'ptc_notes', 'ptc_tes']], axis=0, ignore_index=True) # Plot the results palette = dict( zip(['min_mean_temp', 'max_mean_temp'], sns.color_palette("rocket_r", 6))) # Plot the lines on two facets sns.relplot(x='hour', y="Temp_C", hue="min_or_max", col="solar_tech", palette=palette, height=5, aspect=.75, facet_kws=dict(sharex=True, sharey=True), kind="line", legend="full", data=package_temps)
plot1.set_xticklabels(plot1.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor', size=14) plot1.set_yticklabels(plot1.get_yticks(), size=14) ax.yaxis.set_major_formatter(ticker.ScalarFormatter()) plot1.set_xlabel('') plot1.set_ylabel('Conversion (%)', size=16) plt.savefig('conversion_time1.png', format='png', dpi=500, bbox_inches='tight') dfl[0]['a'] = 'a' count=0 for i in dfl: print(dfl[count]) fig = plt.figure(figsize=(11, 7)) ax = fig.add_subplot(1, 1, 1) plot = sns.relplot(x='Date', y='λ (%) CpG', ci=None, col='a', hue='Date', col_wrap=(1), kind='scatter', linewidth=0.2, x_jitter=100000, height=3, aspect=1.5, s=30, legend=False, data=dfl[count]) for ax in plot.axes.flat: # ax.set(xlim=(70, 100)) # ax.xaxis.set_major_locator(ticker.MultipleLocator(10)) # ax.xaxis.set_major_formatter(ticker.ScalarFormatter()) ax.set(ylim=(None, 101)) ax.yaxis.set_major_locator(ticker.MultipleLocator(5)) ax.yaxis.set_major_formatter(ticker.ScalarFormatter()) # # ax.grid(b=True, which='major') # ax.plot([0, 100], [0, 100], linestyle='--', linewidth=1, zorder=0.9, color='grey')
rowperneuron = int(len(runresult['v']) / N) # this needs to an integer runresult['t_stim'] = tile(np.linspace(0, runtime, rowperneuron), N) # need to add n neurons here capresult.append(runresult) # %% #tidy and plot capresult = pd.concat(capresult) # %% sns.relplot( x='t_stim', y='v', kind='line', #hue = 'sweep', row='sweep', height=2, aspect=8, #row = 'neuron', hue='neuron', data=capresult) # %% [markdown] # I played with the stimulation voltage, it seems like spikes mostly go to full 50 mV amplitude with the given conductances even for small injections. # # That is encouraging, the output is tidy, now we can try to include the per-neuron parameters, to summarise more. # # We can do this with a spikemonitor object. # %% start_scope()
cond_names, norm_list, model=model, adjust_time=False) # ============================================================================= # same plot but now facet for readouts # ============================================================================= figname = "pscan_models" g = sns.relplot(x="xnorm", y="ylog", kind="line", data=df_new, hue="readout", col="pname", height=4, facet_kws={ "margin_titles": True, "sharex": True, "sharey": True }, legend="full") ylim = (None, None) g.set(ylim=ylim, ylabel="log2FC") for ax in g.axes.flat: ax.set_xscale("log") ax.xaxis.set_major_locator(ticker.LogLocator(base=10.0, numticks=100)) [plt.setp(ax.texts, text="") for ax in g.axes.flat] g.set_titles(row_template='{row_name}', col_template='{col_name}')
alldata = pd.concat([train, test], axis=0).reset_index(drop=True) alldata["Open Date"] = pd.to_datetime(alldata["Open Date"]) alldata["Year"] = alldata["Open Date"].apply(lambda x: x.year) alldata["Month"] = alldata["Open Date"].apply(lambda x: x.month) alldata["Day"] = alldata["Open Date"].apply(lambda x: x.day) alldata["kijun"] = "2015-04-27" alldata["kijun"] = pd.to_datetime(alldata["kijun"]) alldata["BusinessPeriod"] = (alldata["kijun"] - alldata["Open Date"]).apply(lambda x: x.days) alldata = alldata.drop('Open Date', axis=1) alldata = alldata.drop('kijun', axis=1) #%% sb.relplot(x="Open Date", y="revenue", col="City Group", data=train) #%% # 訓練データ特徴量をリスト化 cat_cols = alldata.dtypes[alldata.dtypes == 'object'].index.tolist() num_cols = alldata.dtypes[alldata.dtypes != 'object'].index.tolist() other_cols = ['Id', 'WhatIsData'] # 余計な要素をリストから削除 cat_cols.remove('WhatIsData') #学習データ・テストデータ区別フラグ除去 num_cols.remove('Id') #Id削除 # カテゴリカル変数をダミー化 cat = pd.get_dummies(alldata[cat_cols])
cond_names2 = ["beta"+str(val) for val in arr] # ============================================================================= # run experiment # ============================================================================= exp = multi_exp(time, cond_list, cond_names, cond_names2) norm = matplotlib.colors.Normalize( vmin=np.min(arr), vmax=np.max(arr)) # choose a colormap cm = matplotlib.cm.Blues # create a ScalarMappable and initialize a data structure sm = matplotlib.cm.ScalarMappable(cmap=cm, norm=norm) sm.set_array([]) g = sns.relplot(x = "time", y = "value", kind = "line", data = exp, hue = "cond2", col = "cond", palette = "Blues", height = 5,legend = False) g.set(ylim = (0, 30), xlim = (0,time[-1])) ax = g.axes[0][0] ax.set_ylabel("cell dens. norm.") g.set_titles("{col_name}") cbar = g.fig.colorbar(sm, ax = g.axes, ticks = [1,5,10]) cbar.set_label(r"$\beta$") g.savefig("../figures/prolif_tc_beta.pdf")
#what is pylint? did'nt I alraedy have that? #I messwed with that 'base':conda when I was working with the flask stuff over the weekend # Use the iris database to answer the following quesitons: iris = sns.load_dataset('iris') # What does the distribution of petal lengths look like? sns.boxplot(data=iris, y = 'petal_length') sns.distplot(iris.petal_length) # Is there a correlation between petal length and petal width? sns.relplot(x='petal_length', y='petal_width', data=iris) # Would it be reasonable to predict species based on sepal width and sepal length? sns.relplot(x='sepal_length', y='sepal_width', columns='species' ,data=iris) # Which features would be best used to predict species? sns.relplot(x='petal_length', y='petal_width', col='species' , hue='species', data=iris) #The data is much more tightly clustered together when plotting the petal measurements. # 1 # Using the lesson as an example, use seaborn's load_dataset function to load the anscombe # data set. Use pandas to group the data by the dataset column, and calculate summary # statistics for each dataset. What do you notice? anscombe = sns.load_dataset('anscombe')
## Seaborn A more modern plotting library built on matplotlib import seaborn as sns tips = sns.load_dataset("tips") dots = sns.load_dataset("dots") diamonds = sns.load_dataset("diamonds") sns.relplot( data = dots, x = "time", y = "firing_rate", col = "align", hue="coherence", size="choice", kind = "line", palette=sns.color_palette("rocket_r") ) ### Scatter sns.set_theme(style="whitegrid", context='paper') sns.scatterplot(data = diamonds, x="carat", y="price", hue="clarity", size="depth" ) ### Boxplot
print("Avg Of Kms Driven",avg_kms_driven) #"KMs Driven" UsedCarData["KMs Driven"].replace(np.nan,avg_kms_driven,inplace=True) #Replace Null values with avg km driven UsedCarData.dropna(subset={"Brand","Condition","Fuel","Model","Registered City","Transaction Type"},axis=0,inplace=True)#Delete the NUll data UsedCarData.reset_index(drop=True,inplace=True) #Reset index UsedCarData.replace("",np.nan,inplace=True) print(UsedCarData) #***************PHASE-II*************# #***Grpah Brand Vs Price**** print(UsedCarData['Fuel'].value_counts()) print(UsedCarData['Registered City'].value_counts()) #****Graph Condition Vs Price*** fig=plt.figure() sns.barplot(x='Condition',y='Price',hue="Condition",data=UsedCarData.loc[:,('Condition','Price')]) #****Graph Fuel Vs Price*** sns.relplot(x='Fuel',y='Price',kind="line",data=UsedCarData.loc[:,('Fuel','Price')]) #****Graph Year Vs Price*** sns.relplot(x='Year',y='Price',kind="line",data=UsedCarData.loc[:,('Year','Price')]) #****Graph Condition Vs Year*** var=UsedCarData.groupby('Condition').Year.sum() fig=plt.figure() ax1 = fig.add_subplot(1,1,1) ax1.set_xlabel('Condition') ax1.set_ylabel('Year') ax1.set_title("Condition Vs Year") var.plot(kind='bar') fig=plt.figure() #****Graph Brand Vs Price*** g=sns.barplot(x='Brand',y='Price',data=UsedCarData.loc[:,('Brand','Price')]) for item in g.get_xticklabels(): item.set_rotation(90) plt.show()
steps = range(500, 10000, 500) for n_obs in steps: texts, labels = DataReader(0.5, dataset=DATASET).take(n_obs) s_train = set(texts) print(f"train/test overlaps: {len(s_train & s_test)}" ) # todo: ensure 0 overlaps dataset = [simple_preprocess(t) for t in texts] svm_model = tfidf_svm(dataset, labels) predicted = svm_model.predict(test_dataset) acc = accuracy_score(test_labels, predicted) print(f"accuracy with {n_obs} training examples. svm: {acc}") accuracy_per_step_svm.append(acc) df_plot = pd.DataFrame(data={ 'svm': accuracy_per_step_svm, 'training_examples': list(steps) }) sns.set() sns_plot = sns.relplot(data=pd.melt(df_plot, ['training_examples']), x='training_examples', y='value', hue='variable', kind="line") sns_plot.savefig("baseline_per_examples_polluted.png")
## All of this code chunk needs to be run at one time, otherwise you get weird errors. This ## is true for many plotting commands which are composed of multiple commands. # %% [markdown] # The `FacetGrid` tells `seaborn` that we're going to layer graphs, with layers based on `hue` and the hues being determined by values of `kind`. Notice that we can add a few more details like the aspect ratio of the plot and so on. The documentation for [FacetGrid](https://seaborn.pydata.org/generated/seaborn.FacetGrid.html), which we will also use for facets below, may be helpful in finding all the options you can control. # %% [markdown] # We can also show more than one kind of layer on a single graph # %% fmri = sns.load_dataset('fmri') # %% plt.style.use('seaborn-notebook') sns.relplot(x='timepoint', y='signal', data=fmri) # %% sns.relplot(x='timepoint', y='signal', data=fmri, kind='line') # %% sns.relplot(x='timepoint', y='signal', data=fmri, kind='line', hue='event') # %% sns.relplot(x='timepoint', y='signal', data=fmri, hue='region', style='event', kind='line')
acc = 0 total = np.sum(acc_data.values[:, 3]) for row in acc_data.values: acc += row[1] * row[3] / total for j, stat in enumerate( [acc, model, num_feats, train, test, attribute]): master_df.values[i, j] = stat if (i == 0): title_string = (( "{} predictor trained on {}, tested on {}".format( attribute, train, test))) master_df['feats'] = pd.to_numeric(master_df['feats']) master_df['acc'] = pd.to_numeric(master_df['acc']) print(master_df) print(master_df.dtypes) #fmri = sns.load_dataset("fmri") #print(fmri) #idk = sns.relplot(x="timepoint", y="signal", hue="region", style="event", kind="line", data=fmri) idk = sns.relplot(x="feats", y="acc", hue="model", kind="line", data=master_df) #idk = sns.relplot(x="feats", y="acc", kind="line", data=master_df) plt.title(title_string) plt.ylim(0, 1) plt.tight_layout() plt.savefig('figures/' + (title_string.replace(" ", "")) + '.png') #plt.show()
### 6-3. col 옵션을 추가하여 그래프를 별도로 그려볼 수 있습니다 # 또한, `col_wrap`으로 한 줄에 표기할 column의 갯수를 명시할 수 있습니다. sns.lmplot(x='total_bill', y='tip', hue='smoker', col='day', col_wrap=2, height=6, data=tips) plt.show() ## 7. relplot # 두 column간 상관관계를 보지만 `lmplot`처럼 선형관계를 따로 그려주지는 않습니다. # [relplot 도큐먼트](https://seaborn.pydata.org/generated/seaborn.relplot.html?highlight=relplot#seaborn.relplot) ### 7-1. 기본 relplot sns.relplot(x="total_bill", y="tip", hue="day", data=tips) plt.show() ### 7-2. col 옵션으로 그래프 분할 sns.relplot(x="total_bill", y="tip", hue="day", col="time", data=tips) plt.show() ### 7-3. row와 column에 표기할 데이터 column 선택 sns.relplot(x="total_bill", y="tip", hue="day", row="sex", col="time", data=tips) plt.show() ### 7-4. 컬러 팔레트 적용 sns.relplot(x="total_bill", y="tip", hue="day", row="sex", col="time", palette='CMRmap_r', data=tips)
#st.area_chart(data_tx) #st.area_chart(data_daily) #'COVID-19 daily data for Texas' #Sidebar st.sidebar.title("Data-Set Selector") st.sidebar.markdown("Select the Charts/Plots accordingly:") if st.sidebar.checkbox('Show Me The Data - "CSV Data-Set"'): '### Data for from an hisstorical basis' st.dataframe(data_tx, width=3000, height=700) if st.sidebar.checkbox('Positive to Recovered Cases Texas'): '### Positive to Recovered Cases Texas' 'As we look at our scatter plot we are able to see that as we see the amount of cases rise, we are also see a rise in recovered cases as well' 'Looking at the "hue" change we are able to see the breaking point for this data around the middle of 2020-06' ax = sns.relplot(x='datetime', y='positive', hue='recovered', data=data_tx) ax = ax.set_xticklabels(rotation=30) ax = ax.set_ylabels('Positive Cases in hundred thousands') st.pyplot(ax) if st.sidebar.checkbox("'Deaths' To 'Hospitalized Currently' Cases In Texas"): "### 'Deaths' To 'Hospitalized Currently' Cases In Texas" 'When we take a look at the "deaths" we are spike in "deaths" almost 1 month after the initial spike of "positive" cases.' ax1 = sns.relplot(x='datetime', y='death', hue='hospitalizedCurrently', data=data_tx) ax1 = ax1.set_xticklabels(rotation=30) ax1 = ax1.set_ylabels('Current Death Counts') st.pyplot(ax1) #st.vega_lite_chart(data_daily)
""" plota um gráfico de dispersão @params hue define cor de plotagem variando pela coluna "finalizado" """ sns.scatterplot(x="horas_esperadas", y="preco", hue="finalizado", data=df_dados) """ plota um gráfico de relação separado os gráficos pela coluna "finalizado" @params hue define cor de plotagem variando pela coluna "finalizado" @params col define a separação dos gráficos pela coluna "finalizado" """ sns.relplot(x="horas_esperadas", y="preco", hue="finalizado", col="finalizado", data=df_dados) SEED = 20 # se setarmos aqui podemos tirar o random_state das chamadas quando estas usam o np.random np.random.seed(SEED) """ Separação dos dados e treino do modelo. """ x = df_dados[["horas_esperadas", "preco"]] y = df_dados["finalizado"] # método para separar dados from sklearn.model_selection import train_test_split treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.20, # 20% da massa para teste
0.952, 0.960, 0.964, 0.965, 0.968, 0.969, 0.970, 0.970, 0.971, 0.972, 0.972, 0.972, 0.973, 0.973, 0.973, 0.973, 0.973, 0.974, 0.974, 0.974 ], 'hF': [ 0.948, 0.958, 0.962, 0.964, 0.967, 0.967, 0.968, 0.969, 0.970, 0.970, 0.970, 0.971, 0.971, 0.972, 0.972, 0.972, 0.972, 0.973, 0.973, 0.973 ], } #df = pd.DataFrame(data) df = pd.DataFrame(dataes) ax = sns.relplot( data=df, kind="line", x="size%", y="Acc", facet_kws=dict(sharex=False), ) ax.set(xlabel='% dataset', ylabel='Precisión') plt.savefig('learning_curve_es.svg') #col="align", hue="choice", size="coherence", style="choice", ''' Learning curve \begin{table}[htbp] \centering \begin{tabular}{cccccc} \toprule \multicolumn{2}{c}{Resources}&\\ \cline{1-2}
'''In this exercise, we'll explore Seaborn's mpg dataset, which contains one row per car model and includes information such as the year the car was made, the number of miles per gallon ("M.P.G.") it achieves, the power of its engine (measured in "horsepower"), and its country of origin. What is the relationship between the power of a car's engine ("horsepower") and its fuel efficiency ("mpg")? And how does this relationship vary by the number of cylinders ("cylinders") the car has? Let's find out. Let's continue to use relplot() instead of scatterplot() since it offers more flexibility.''' # Import Matplotlib and Seaborn import matplotlib.pyplot as plt import seaborn as sns # Create scatter plot of horsepower vs. mpg sns.relplot(x="horsepower", y="mpg", data=mpg, kind="scatter", size="cylinders", hue="cylinders") # Show plot plt.show()
return df if __name__ == "__main__": df = makeUrl() df.dropna(inplace=True) df["newTests"] = df["newTests"].astype(int) df["positive Test Percentage"] = 2 df["positive Test Percentage"] = df["positive Test Percentage"].astype( float) df['positive Test Percentage'] = list( map(lambda x, y: x / y, df['newCases'], df['newTests'])) df["positive Test Percentage"] = df["positive Test Percentage"] * 100 df['date'] = pd.to_datetime(df['date'], dayfirst=False, yearfirst=False) df.sort_values(by=["date"], inplace=True, ascending=True) print(df) style.use('ggplot') g = sns.relplot( x="date", y="positive Test Percentage", kind="line", data=df, ) g.fig.autofmt_xdate() plt.show()
Year_imdb.rename(columns={"IMDb Mean":"IMDb_Mean"}, inplace=True) Year_imdb.rename(columns={"Number of Shows":"Number_series"}, inplace=True) fig, ax = plt.subplots(figsize=(16, 9)) ax.plot(Year_imdb.sort_index()["IMDb_Mean"],color="red") plt.xlabel("Years",color="black",fontsize=20) plt.ylabel("IMDb Ratings",color="black",fontsize=20) plt.yticks(fontsize=20) plt.xticks(fontsize=20) plt.show() #Plot05 fig, ax = plt.subplots() fig.set_size_inches(11.7, 8.27) sns.relplot(x="IMDb_Mean", y="Number_series", data=Year_imdb,) plt.title("Distribution of IMDb Ratings Respect to Number of Series") plt.xlabel("IMDb Ratings", fontsize=15) plt.ylabel("Number of Series", fontsize=15) plt.yticks(fontsize=15) plt.xticks(fontsize=15) plt.show() #plot06 vistvdata["Year"] = vistvdata["Year"].astype("int32") vistvdata['Year_Cut'] = pd.qcut(vistvdata['Year'], q=4) vistvdata["Year_Cut"].cat.categories sns.set_style("whitegrid") fig, ax = plt.subplots() fig.set_size_inches(11.7, 8.27)
""" Line plots on multiple facets ============================= _thumb: .45, .42 """ import matplotlib.pyplot as plt import seaborn as sns sns.set(style="ticks") dots = sns.load_dataset("dots") help(sns.load_dataset) print dots.head(10) # Define a palette to ensure that colors will be # shared across the facets palette = dict(zip(dots.coherence.unique(), sns.color_palette("rocket_r", 6))) # Plot the lines on two facets sns.relplot(x="time", y="firing_rate", hue="coherence", size="choice", col="align", size_order=["T1", "T2"], palette=palette, height=5, aspect=.75, facet_kws=dict(sharex=False), kind="line", legend="full", data=dots) plt.show()
""" Scatterplot with varying point sizes and hues ============================================== _thumb: .45, .5 """ import seaborn as sns import matplotlib.pyplot as plt sns.set(style="white") # Load the example mpg dataset, mpg is pandas.DataFrame mpg = sns.load_dataset("mpg") print mpg.columns print mpg.head(10) # Plot miles per gallon against horsepower with other semantics sns.relplot(x="horsepower", y="mpg", hue="origin", size="weight", sizes=(40, 400), alpha=.5, palette="muted", height=6, data=mpg) #plt.show()