def plot_paired_genes_facet(table, output=None): """Hexbin plot of RNA vs. aCGH log2 values. Facet by segment size: <5MB<50MB< """ xymin = min(min(table['RNA']), min(table['aCGH'])) xymax = max(max(table['RNA']), max(table['aCGH'])) pad = 0.3 xy_limits = (xymin - pad, xymax + pad) nbins = 50 size_labels = ['>50MB', '5-50MB', '<5MB'] grid = seaborn.FacetGrid( table, col='Size', col_order=size_labels, xlim=xy_limits, ylim=xy_limits, # subplot_kws={'aspect': 1}, margin_titles=True) # This shows blue lines around the hex bins :'( # grid.map(plt.hexbin, 'aCGH', 'RNA', # bins='log', gridsize=nbins, mincnt=1, # edgecolors='red', # ) # Plot a diagonal line, summary stats, and hex bins for ax, size_label in zip(grid.axes.flat, size_labels): # Draw the 1:1 diagonal as an overlaid line plot # Use the legend to draw the annotation # (adapted from seaborn.axisgrid.JointGrid.annotate) data_subset = table[table['Size'] == size_label] pearson_r, _p = corr_stats(data_subset['aCGH'], data_subset['RNA']) N = len(data_subset) annotation = "Pearson r = {:.3f}\nN = {}".format(pearson_r, N) ax.text(xy_limits[0] + 1, xy_limits[1] - 1, annotation, fontsize='x-small', verticalalignment='top') ax.plot(xy_limits, xy_limits, color='lightgray', linestyle='-', linewidth=1, zorder=-1) ax.hexbin( data_subset['aCGH'], data_subset['RNA'], bins='log', gridsize=nbins, mincnt=1, ) ax.set_xlabel("aCGH") ax.set_title(size_label) grid.axes.flat[0].set_ylabel("RNA") if output: plt.savefig(output, format='pdf', bbox_inches='tight') print("Wrote", output, file=sys.stderr) else: plt.show()
# Evaluate predictions meas, pred = ys_test[test].values, lm.predict(xs_test.ix[test]) rsquared = r2_score(meas, pred) cor, pval = pearsonr(meas, pred) # Store results lm_res.append((ion, condition, cor, pval, rsquared, lm)) lm_res = DataFrame(lm_res, columns=['ion', 'condition', 'cor', 'pval', 'rsquared', 'lm']) print lm_res.sort('rsquared') # Plot General Linear regression boxplots sns.set(style='ticks', font_scale=.75, context='paper', rc={'axes.linewidth': .3, 'xtick.major.width': .3, 'ytick.major.width': .3}) g = sns.FacetGrid(lm_res, legend_out=True, aspect=1., size=1.5, sharex=True, sharey=False) g.map(sns.boxplot, 'cor', 'condition', palette=palette, sym='', linewidth=.3, order=label_order, orient='h') g.map(sns.stripplot, 'cor', 'condition', palette=palette, jitter=True, size=2, split=True, edgecolor='white', linewidth=.3, order=label_order, orient='h') g.map(plt.axvline, x=0, ls='-', lw=.1, c='gray') plt.xlim([-1, 1]) g.set_axis_labels('Pearson correlation\n(measured ~ predicted)', '') g.set_titles(row_template='{row_name}') g.fig.subplots_adjust(wspace=.05, hspace=.2) sns.despine(trim=True) plt.savefig('%s/reports/lm_dynamic_boxplots_tfs_gsea.pdf' % wd, bbox_inches='tight') plt.close('all') print '[INFO] Plot done' # Top predicted metabolites boxplots # lm_res[(lm_res['cor'] > 0) & (lm_res['pval'] < .05)]
as_index=False).mean().sort_values(by='pregnant', ascending=True)) # import matplotlib.pyplot as plt import seaborn as sns # plt.figure(figsize=(12, 12)) sns.heatmap(df.corr(), linewidths=0.1, vmax=0.5, cmap=plt.cm.gist_heat, linecolor='white', annot=True) plt.show() grid = sns.FacetGrid(df, col='class') grid.map(plt.hist, 'plasma', bins=10) plt.show() from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense import numpy as np import tensorflow as tf # seed 값 생성 np.random.seed(3) tf.random.set_seed(3) # 데이터 로드 dataset = np.loadtxt('./deeplearning/dataset/pima-indians-diabetes.csv', delimiter=",")
'Title']).Age.transform('median') print(df_train["Age"].isnull().sum()) # In[ ]: #Let's see the result of the inputation sns.distplot(df_train["Age"], bins=24) plt.title("Distribuition and density by Age") plt.xlabel("Age") plt.show() # In[ ]: #separate by survivors or not g = sns.FacetGrid(df_train, col='Survived', size=5) g = g.map(sns.distplot, "Age") plt.show() # Now let's categorize them # In[ ]: #df_train.Age = df_train.Age.fillna(-0.5) interval = (0, 5, 12, 18, 25, 35, 60, 120) cats = ['babies', 'Children', 'Teen', 'Student', 'Young', 'Adult', 'Senior'] df_train["Age_cat"] = pd.cut(df_train.Age, interval, labels=cats) df_train["Age_cat"].head()
stock_returns.mean() stock_returns.cov() #%% some plot cum_returns = np.cumprod(1 + stock_returns) - 1 cum_returns.index = stock_returns.index cum_returns.plot() plt.legend(loc="upper left") plt.ylabel("Return of $1 on first date") plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1)) meltt = stock_returns.melt(var_name="column") sns.displot(meltt, x='value', hue='column', bins="sqrt", kde=True) g = sns.FacetGrid(meltt, col='column', col_wrap=2) g.map(sns.histplot, 'value', bins="sqrt", kde=True) #%% model mdl_data = { "N": len(stock_returns), "N_stocks": len(stock_names), "observations": stock_returns.values } modelfile = "stocks.stan" with open(modelfile, "w") as file: file.write(""" data { // avoid putting data in matrix except for linear algebra int<lower=0> N; int<lower=0> N_stocks;
import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns test = pd.read_csv('test.csv') train = pd.read_csv('train.csv') print(train.describe()) df = train.groupby("Embarked").mean() print(df) grid = sns.FacetGrid(train, row='Embarked', size=2.2, aspect=1.6) grid.map(sns.pointplot, 'Pclass', 'Survived', 'Sex', palette='deep') grid.add_legend() plt.show()
train.head() # In[14]: train.drop("Name",axis=1,inplace=True) test.drop("Name",axis=1,inplace=True) # In[15]: facet=sns.FacetGrid(train,hue="Survived",aspect=4) facet.map(sns.kdeplot,"Age",shade=True) facet.set(xlim=(0,train["Age"].max())) facet.add_legend() plt.show() # In[16]: facet=sns.FacetGrid(train,hue="Survived",aspect=4) facet.map(sns.kdeplot,"Age",shade=True) facet.set(xlim=(0,train["Age"].max())) facet.add_legend() plt.xlim(20,30)
## 4. Generating A Kernel Density Plot ## sns.kdeplot(titanic['Age'], shade=True) plt.xlabel("Age") plt.show() ## 5. Modifying The Appearance Of The Plots ## sns.set_style("white") sns.kdeplot(titanic["Age"],shade=True) plt.xlabel("Age") sns.despine(left=True, bottom=True) ## 6. Conditional Distributions Using A Single Condition ## g=sns.FacetGrid(titanic,col="Pclass",size=6) g.map(sns.kdeplot,"Age",shade=True) sns.despine(left=True,bottom=True) plt.show() ## 7. Creating Conditional Plots Using Two Conditions ## g = sns.FacetGrid(titanic, col="Survived", row="Pclass") g.map(sns.kdeplot, "Age", shade=True) sns.despine(left=True, bottom=True) plt.show() ## 8. Creating Conditional Plots Using Three Conditions ## g = sns.FacetGrid(titanic, col="Survived", row="Pclass",hue="Sex",size=3) g.map(sns.kdeplot, "Age", shade=True)
def draw_example(example, myo_data): import seaborn as sns import matplotlib.pyplot as plt import pandas as pd print(np.shape(example)) if myo_data: indexes = pd.Series(range(1, 5 * len(example[0]) + 1, 5), name="Time") else: indexes = pd.Series(range(1, len(example[0]) + 1), name="Time") dictionary_labels = {} for i in range(len(example)): dictionary_labels.update({str(i + 1): example[i, :]}) data = pd.DataFrame(data=np.swapaxes(example, 1, 0), columns=pd.Series(list(dictionary_labels.keys()), name="Channel"), index=indexes) data = data.cumsum(axis=0).stack().reset_index(name="val") print(data.keys()) sns.set(style="whitegrid", font_scale=4) g = sns.FacetGrid(data, col="Channel", col_wrap=1, height=3.5, despine=True, size=16) def signal_plot(x, y, **kwargs): ax = plt.gca() data = kwargs.pop("data") #data.plot(x=x, y=y, sharex=True, sharey=True, ax=ax, linewidth=10, grid=False, **kwargs) data.plot(x=x, y=y, sharex=True, sharey=True, ax=ax, linewidth=1, grid=False, **kwargs) g.map_dataframe(signal_plot, "Time", "val") g.set_ylabels("") g.set_xlabels("") g.set_xticklabels("") g.set_yticklabels("") g.set_titles("") plt.show() if myo_data: frequency = 200 else: frequency = 1000 from scipy import fftpack X = fftpack.fft(example[0]) freqs = fftpack.fftfreq(len(example[0])) * frequency fig, ax = plt.subplots() ax.stem(freqs, np.abs(X)) ax.set_xlabel('Frequency in Hertz [Hz]') ax.set_ylabel('Frequency Domain (Spectrum) Magnitude') ax.set_xlim(-frequency / 2, frequency / 2) plt.show()
def viz_cat_cont_density(df, features, target): for feature in features: sns.FacetGrid(df, row=feature, size=8).map(sns.kdeplot, target).add_legend() plt.xticks(rotation=45)
import seaborn as sns import pandas as pd import numpy as np import matplotlib.pyplot as plt """ 画直方图 """ tips = sns.load_dataset("tips") g = sns.FacetGrid(tips, col='time') g.map(plt.hist, "tip") """ 画散点图 """ g = sns.FacetGrid(tips, col='sex', hue='smoker') # 设置参数hue,分类显示 g.map(plt.scatter, "total_bill", "tip", alpha=0.7) # 参数alpha,设置点的大小 g.add_legend() # 加注释
# Let's try the factorplot again! sns.catplot(x='Pclass', kind='count', data=titanic_df, hue='person') # In[27]: # Getting a distribution of ages titanic_df['Age'].hist(bins=70) # In[29]: #Getting a quick comparison of male, female, child titanic_df['person'].value_counts() # In[33]: fig = sns.FacetGrid(titanic_df, hue='Sex', aspect=4) fig.map(sns.kdeplot, 'Age', shade=True) # Set the x max limit by the oldest passenger oldest = titanic_df['Age'].max() #Since we know no one can be negative years old set the x lower limit at 0 fig.set(xlim=(0, oldest)) #Finally add a legend fig.add_legend() # In[35]: fig = sns.FacetGrid(titanic_df, hue='person', aspect=4)
palette="muted") g_sibsp = g_sibsp.set_ylabels("survival probability") plt.show() # Parch和survived之间的关系 g_parch = sns.factorplot(x="Parch", y="Survived", data=train, kind="bar", size=6, palette="muted") g_parch = g_parch.set_ylabels("survival probabitlity") plt.show() # Age和survived的关系 g_age = sns.FacetGrid(train, col='Survived') g_age = g_age.map(sns.distplot, "Age") plt.show() # Age曲线分布 g = sns.kdeplot(train["Age"][(train["Survived"] == 0) & (train["Age"].notnull())], color="Red", shade=True) g = sns.kdeplot(train["Age"][(train["Survived"] == 1) & (train["Age"].notnull())], ax=g, color="Blue", shade=True) g.set_xlabel("Age") g.set_ylabel("Frequency")
# Rotate tick marks for visibility plt.yticks(rotation=0) plt.xticks(rotation=90) # Show the plot plt.show() plt.clf() #Create a FacetGrid that shows a point plot of the Average SAT scores SAT_AVG_ALL. #Use row_order to control the display order of the degree types. # Create FacetGrid with Degree_Type and specify the order of the rows using row_order g2 = sns.FacetGrid(df, row="Degree_Type", row_order=['Graduate', 'Bachelors', 'Associates', 'Certificate']) # Map a pointplot of SAT_AVG_ALL onto the grid g2.map(sns.pointplot, 'SAT_AVG_ALL') # Show the plot plt.show() plt.clf() #Create a factorplot() that contains a boxplot (box) of Tuition values varying by Degree_Type across rows. # Create a factor plot that contains boxplots of Tuition values
social_trial_by_trial = trial_by_trial[trial_by_trial['treatment'] == 3] results = social_trial_by_trial.groupby('sid').apply(linear_decomp) # Normaize or not #normalized = results.apply(lambda row: row/np.sum(row), axis = 1) normalized = results normalized.reset_index(inplace=True) normalized.rename(columns={'level_1': 'age_group'}, inplace=True) normalized_long = normalized.set_index(['sid', 'age_group']).stack().reset_index() normalized_long.rename(columns={'level_2': 'norm', 0: 'beta'}, inplace=True) #normaized_long = normaized.stack([['greedy', 'equality', 'socialmax', 'other']]) #%%plot all betas def tdc_factor(data, **kwargs): sb.pointplot(x='age_group', y='beta', data=data) #group['treatment_name'] = [treatments[x] for x in group.treatment] #plt.figure() g = sb.FacetGrid(normalized_long, col='norm', col_wrap=2) g = g.map_dataframe(tdc_factor) #plt.subplots_adjust(top=0.9) #g.fig.suptitle('Group '+ str(name)) #%% #sb.factorplot(x='level_1', y = 'greedy', data = normaized, kind="point")
#!/usr/bin/env python # coding: utf-8 # In[1]: import seaborn as sns tips = sns.load_dataset("tips") tips # In[4]: empty = sns.FacetGrid(tips) # In[5]: one = sns.FacetGrid(tips, col="time", row="smoker") # In[6]: import matplotlib.pyplot as plt # In[ ]: sns.FacetGrid(tips)
# ## Imports # # **Import the data visualization libraries if you haven't done so already.** # In[101]: import matplotlib.pyplot as plt import seaborn as sns sns.set_style('white') get_ipython().run_line_magic('matplotlib', 'inline') # **Use FacetGrid from the seaborn library to create a grid of 5 histograms of text length based off of the star ratings. Reference the seaborn documentation for hints on this** # In[102]: g = sns.FacetGrid(yelp, col='stars') g.map(plt.hist, 'text length') # **Create a boxplot of text length for each star category.** # In[103]: sns.boxplot(x='stars', y='text length', data=yelp, palette='rainbow') # **Create a countplot of the number of occurrences for each type of star rating.** # In[104]: sns.countplot(x='stars', data=yelp, palette='rainbow') # ** Use groupby to get the mean values of the numerical columns, you should be able to create this dataframe with the operation:**
# fill NaN values in Age column with random values generated titanic_df["Age"][np.isnan(titanic_df["Age"])] = rand_1 test_df["Age"][np.isnan(test_df["Age"])] = rand_2 # convert from float to int titanic_df['Age'] = titanic_df['Age'].astype(int) test_df['Age'] = test_df['Age'].astype(int) # plot new Age Values titanic_df['Age'].hist(bins=70, ax=axis2) # test_df['Age'].hist(bins=70, ax=axis4) # .... continue with plot Age column # peaks for survived/not survived passengers by their age facet = sns.FacetGrid(titanic_df, hue="Survived",aspect=4) facet.map(sns.kdeplot,'Age',shade= True) facet.set(xlim=(0, titanic_df['Age'].max())) facet.add_legend() # average survived passengers by age fig, axis1 = plt.subplots(1,1,figsize=(18,4)) average_age = titanic_df[["Age", "Survived"]].groupby(['Age'],as_index=False).mean() sns.barplot(x='Age', y='Survived', data=average_age) # Cabin # It has a lot of NaN values, so it won't cause a remarkable impact on prediction titanic_df.drop("Cabin",axis=1,inplace=True) test_df.drop("Cabin",axis=1,inplace=True) # Family
def display_way(vec_names, values, annotation, words, steps_between, x_label="instance", y_label="activation", model_path="modelXXX", pretrained=False): """Display a list of vectors.""" df = pd.DataFrame({ x_label: vec_names, y_label: values, 'concept': annotation }) # if pretrained: # write_to_csv( # df, "{}/continuous_activation_pretrained_{}_{}_k{}_" # "{}.csv".format(model_path, words[0], words[1], steps_between, # model_path)) # else: # write_to_csv( # df, "{}/continuous_activation_{}_{}_k{}_{}.csv".format( # model_path, words[0], words[1], steps_between, model_path)) g = sns.FacetGrid(df, height=7) def plotter(x, y, **kwargs): regplot = sns.regplot(data=df, x=x_label, y=y_label, fit_reg=False, marker="x", color="darkred") plt.plot(x, y, linewidth=1, color="darkred") tick_labels = regplot.get_xticklabels() for j, tick in enumerate(tick_labels): # rotate all labels by 90 degrees tick.set_rotation(90) tick.set_weight("light") if j == 0 or j == len(tick_labels) - 1: tick.set_weight("normal") for i in range(len(x)): plt.annotate(annotation[i], xy=(i, y.values[i]), fontsize=8, xytext=(0, 50), textcoords="offset points", rotation=90) g.map(plotter, x_label, y_label) file_name = "{}/vector_way_{}_{}_{}_{}.eps".format(model_path, y_label, words[0], words[1], model_path) plt.savefig(file_name) plt.show()
import seaborn as sns import glob import re import matplotlib.pyplot as plt files = glob.glob('data/*.csv') dfs = [] for a in files: df = pd.read_csv(a) df['channel'] = re.search('avg|diff|none', a).group() if re.search('norm', a) and re.search('angle', a): df['options'] = 'both' elif re.search('norm', a): df['options'] = 'norm' elif re.search('angle', a): df['options'] = 'angle' else: df['options'] = 'none' dfs.append(df) all = pd.concat(dfs) all.columns.values[0] = 'epoch' g = sns.FacetGrid(all, col='channel', row='options') g = g.map(plt.plot, 'epoch', 'val_loss', color='blue') g = g.map(plt.plot, 'epoch', 'loss', color='red') g = g.set(ylim=(0, 1)) g.savefig('data/plot.png')
layout = go.Layout(margin=dict(l=0, r=0), scene=Scene, height=1000, width=1000) data = [trace] fig = go.Figure(data=data, layout=layout) fig.show() cluster_tsne_profile = pd.merge(X11, clusters_tsne_scale['tsne_clusters'], left_index=True, right_index=True) cluster_pca_profile = pd.merge(X11, clusters_pca_scale['pca_clusters'], left_index=True, right_index=True) for c in cluster_pca_profile: grid = sns.FacetGrid(cluster_pca_profile, col='pca_clusters') grid.map(plt.hist, c) for c in cluster_tsne_profile: grid = sns.FacetGrid(cluster_tsne_profile, col='tsne_clusters') grid.map(plt.hist, c) plt.figure(figsize=(15, 10)) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 15)) sns.scatterplot( data=cluster_pca_profile, x='Debt ratio %', y='Working Capital to Total Assets', hue='pca_clusters', s=85, alpha=0.4,
# -*- coding: utf-8 -*- """ Created on Mon Aug 5 15:41:26 2019 @author: Gareth """ import seaborn as sns sns.set() # Load the example iris dataset #planets = sns.load_dataset("ucdp_") #cmap = sns.cubehelix_palette(rot=-.2, as_cmap=True) #ax = sns.scatterplot(x="deaths_a", y="deaths_b", # hue="type_of_violence", size="deaths_civilians", # sizes=(10, 200), # data=africa) g = sns.FacetGrid(africa, hue="latitude", subplot_kws=dict(projection='polar'), height=4.5, sharex=False, sharey=False, despine=False) # Draw a scatterplot onto each axes in the grid g.map(sns.scatterplot, "longitude", "best")
iris = sb.load_dataset('iris') iris.head() iris['species'].unique() sb.pairplot(data=iris) # PairGrid sb.PairGrid(data=iris) g = sb.PairGrid(data=iris).map(func=plt.scatter) g = sb.PairGrid(data=iris).map_diag(func=sb.distplot) g = sb.PairGrid(data=iris).map_diag(func=sb.distplot).map_upper( func=plt.scatter) g = sb.PairGrid(data=iris).map_diag(func=sb.distplot).map_upper( func=plt.scatter).map_lower(func=sb.kdeplot) # FacetGrid g = sb.FacetGrid(data=tips, col='time', row='smoker').map(sb.distplot, 'total_bill') g = sb.FacetGrid(data=tips, col='time', row='smoker').map(sb.scatterplot, 'total_bill', 'tip') # Regression Plots ----------------------------------------------------------- tips = sb.load_dataset('tips') tips.head() # Linear Model Plots sb.lmplot(x='total_bill', y='tip', data=tips) sb.lmplot(x='total_bill', y='tip', data=tips, hue='sex') sb.lmplot(x='total_bill', y='tip', data=tips, hue='sex', markers=['^', '1']) sb.lmplot(x='total_bill', y='tip', data=tips,
Created on Thu Sep 12 00:07:02 2019 @author: Hermii """ import pandas as pd import seaborn as sns path = "C:/Users/Hermii/Desktop/Data Challenge 3/repo/jbg060/code/" #csv you get from temp.py comb = pd.read_csv(path + "both_pumps.csv") #Specify only "flow data" and 1 pump flow = comb[(comb["measurementType"] == "Debietmeting.Q") & (comb["City.PumpType"] == "GBS_DB.RG8150")] flow["TimeStamp"] = pd.to_datetime(flow["TimeStamp"]) flow["day"] = flow["TimeStamp"].dt.day_name() flow["hour"] = flow["TimeStamp"].dt.hour g = sns.FacetGrid(data=flow.groupby( ["day", "hour"]).hour.count().to_frame(name='day_hour_count').reset_index(), col='day', col_order=[ 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday' ], col_wrap=3) g.map(sns.barplot, "hour", "day_hour_count")
#This simple analysis confirms our assumptions as decisions for subsequent workflow stages. #이작업은 후속 작업을 가정 선택하는데 도움을 준다. #We should consider Age (our assumption classifying #2) in our model training. #나이를 변수로 고려 #Complete the Age feature for null values (completing #1). #나이에 널값을 채워라 #We should band age groups (creating #3). # 나이를 구간화해라 # In[ ]: g = sns.FacetGrid(train_df, col='Survived') g.map(plt.hist, 'Age', bins=20) #구간을 20씩 잡고 나이-생존율 히스토그램 생성 # In[ ]: #Correlating numerical and ordinal features #We can combine multiple features for identifying correlations using a single plot. #This can be done with numerical and categorical features which have numeric values. #숫자형 변수와 순서형 변수의 상관관계 분석 #한개의 그래프에서 상관관계 분석을 위해서 여러개의 변수를 동시에 나타낼 수 있다. #이것은 숫자형 변수와 숫자값을 포함하는 범주형간에 작업을 할 수 있다. #Observations.
df_unroll["LOOP_TYPE"] = "UNROLLED" df_roll["LOOP_TYPE"] = "ROLLED" df = df_roll.append(df_unroll) filtered_df = df[(df.PHI_JET_SIZE == df.ETA_JET_SIZE) & (df.ETA_GRID_SIZE >= df.ETA_JET_SIZE) & (df.PHI_GRID_SIZE >= df.PHI_JET_SIZE) & (df.NUMBER_OF_SEEDS == 128)] #filtered_df = df[["PHI_JET_SIZE", "ETA_GRID_SIZE", "PHI_GRID_SIZE", "NUMBER_OF_SEEDS", "maxOverallLatency"]] sns.set_style("whitegrid") facet = sns.FacetGrid(filtered_df, row="ETA_GRID_SIZE", col="PHI_JET_SIZE", hue="LOOP_TYPE") facet = facet.map(sns.lineplot, "PHI_GRID_SIZE", "minOverallLatency", ci=None).add_legend() facet.fig.set_size_inches(16, 9) facet.savefig(saveFolder + "/minOverallLatency.pdf") facet = sns.FacetGrid(filtered_df, row="ETA_GRID_SIZE", col="PHI_JET_SIZE", hue="LOOP_TYPE") facet = facet.map(sns.lineplot, "PHI_GRID_SIZE", "maxOverallLatency", ci=None).add_legend() facet.fig.set_size_inches(16, 9) facet.savefig(saveFolder + "/maxOverallLatency.pdf")
df_data['Dimension'] = df_data.apply( lambda x: x['Dimension Type'] + " " + str(x['Dimension']), axis=1) print("Data crunched!") g = sns.FacetGrid( df_data, col='Metric', row='Dimension', hue='Dimension Count', margin_titles=True, sharey=False, col_order=( sorted([x for x in df_data['Metric'].unique() if 'Inverse' in x]) + sorted([x for x in df_data['Metric'].unique() if 'Inverse' not in x], )), row_order=( sorted( [x for x in df_data['Dimension'].unique() if 'Mean' in x], key=lambda str: next(int(s) for s in str.split() if s.isdigit())) + sorted( [x for x in df_data['Dimension'].unique() if 'Minimum' in x], key=lambda str: next(int(s) for s in str.split() if s.isdigit())) + sorted( [x for x in df_data['Dimension'].unique() if 'Euclidean' in x], key=lambda str: next(int(s) for s in str.split() if s.isdigit())))) g.map(sns.distplot, name, hist=False, rug=True) outfile = kn.pack({ 'title': kn.unpack(dataframe_filename)['title'],
def plot_gc_landscape(): df, w, q = df_from_files(snakemake.input.data_files) # replace NaNs introduced by the outer merge with the prediction with zeros # NaNs occur, when segment lengths above K occur df = df.fillna(0) df = filter_combinations(df) # split values above w and below (including) w # for the values below w summ up all values above w # into on entry at w + 5 summed, above_w = sum_and_scatter(df, w) summed["Expected Prob Line"] = summed["Expected Probability"].where( summed["Segment Length"] <= w) # plot empiric distributions heights = { 30: 4, 50: 5, 100: 6, } sns.set( font="DejaVu Sans", style=sns.axes_style("whitegrid", {'grid.linestyle': '--'}), font_scale=1.6, ) g = sns.FacetGrid( summed, row="hf+canon", col="GC-content", height=heights[w], aspect=1.1, margin_titles=True, hue="canonicity", ) g.fig.suptitle(f"Segment Length Distributions for $w={w}$, $q={q}$", y=1.01) g1 = g.map(plt.bar, "Segment Length", "prob") # fix broken z-order # save lower layer (layer 1, barplot) # so that later layers can be put above backgroundartists = [] for ax in g1.axes.flat: for li in ax.lines + ax.collections: li.set_zorder(1) backgroundartists.append(li) beyond_w_bar = [ rect for rect in ax.get_children() if isinstance(rect, Rect) ][-2] # take the color of the bar and make it darker (col_r, col_g, col_b, col_a) = beyond_w_bar.get_fc() col_h, col_l, col_s = colorsys.rgb_to_hls(col_r, col_g, col_b) col_r, col_g, col_b = colorsys.hls_to_rgb(col_h, col_l - 0.1, col_s) beyond_w_bar.set_color((col_r, col_g, col_b, col_a)) # plot predicted points and manually place them on a higher layer than the bars g2 = g.map(sns.scatterplot, "Segment Length", "Expected Probability", color="black") for ax in g2.axes.flat: for li in ax.lines + ax.collections: if li not in backgroundartists: li.set_zorder(5) g.map( sns.lineplot, "Segment Length", "Expected Prob Line", color="black", alpha=0.7, palette=sns.color_palette("Set2_r"), ) # Adjust ticks so that the summed values at the dummy offset are labeled # correctly currently the dummy value is at w + 5 for ax in g2.axes.flat: labels = [ item.get_text() if item.get_text() != f"{w+5}" else ">w" for item in ax.get_xticklabels() ] ax.set_xticklabels(labels) label = ax.get_ylabel() ax.set_ylabel( label if label != "Expected Prob Line" else "Probability") g.set(yscale="log") sns.despine() plt.savefig(snakemake.output.gc_landscape_pdf, bbox_inches='tight')
sns.jointplot('家賃', '大きさ', data=df) plt.show() sns.pairplot(df) plt.show() # 箱ひげ図 sns.boxplot('近さ', '家賃', data=df) plt.show() # 近さごとに色分けしてヒストグラム、散布図 sns.pairplot(df, hue='近さ') plt.show() # 複数のグラフを並べる g = sns.FacetGrid(df, col='近さ') g.map(plt.hist, '家賃') plt.show() g = sns.FacetGrid(df, col='方角', hue='近さ', col_wrap=4) g.map(plt.scatter, '大きさ', '家賃') plt.show() # 近さによって家賃の分布がどう変わるかを調べる # t検定 print(stats.ttest_ind(df[df['近さ'] == 'A']['家賃'], df[df['近さ'] == 'B']['家賃'])) # 大きさと家賃の関係 # 線形回帰 print(stats.linregress(df['大きさ'], df['家賃'])) sns.lmplot('大きさ', '家賃', data=df)
def plot_input_tstep(self, epoch, save=False, outpath="data/plots/", what="activation"): """Plot harmony of single activation states for all stimuli at a given training epoch""" states = self.data['S_trace'] # Initialize the tensors # sate_sum saves the activation values as sum of all activations # it has dim (inputs, act/timestep, harmony/timestep) states_sum = torch.zeros(len(self.inputNames), states.shape[2], 4) timesteps = torch.arange(0, states.shape[2]) for i in range(len(self.inputNames)): harmonies = self.data['Harmony_trace'][i, epoch, :] s = states[i, epoch, :, :, :] for tstep in range(s.shape[0]): act = s[tstep, :, :].sum() states_sum[i, tstep, 0] = i # input number states_sum[i, tstep, 1] = tstep states_sum[i, tstep, 2] = act # activation state (sum of) # Harmony at that state states_sum[i, tstep, 3] = harmonies[tstep] # Join all data collapsed = states_sum.view( (len(self.inputNames) * states_sum.shape[1], 4)) collapsed = collapsed.numpy() # Build list of input names stim_names = [] for name in self.inputNames: for i in range(states.shape[2]): stim_names.append(name) df = pd.DataFrame(collapsed, columns=["input", "tstep", "activation", "harmony"]) df['inpName'] = stim_names if what == "activation": p = sns.relplot(data=df, y="activation", x="tstep", hue="inpName", kind="line", palette="viridis") p.set(title=f"Activation over time (epoch {epoch})") plt.show() if save: fig = p.get_figure() fig.savefig(outpath + f"all_activations_{epoch}") if what == "harmony": p = sns.relplot(data=df, y="harmony", x="tstep", hue="inpName", kind="line", palette="viridis") p.set(title=f"Harmony over time (epoch {epoch})") plt.show() if save: fig = p.get_figure() fig.savefig(outpath + f"all_harmonies_{epoch}") if what == "regplot_facet": g = sns.FacetGrid(data=df, hue="inpName", col="inpName", palette="deep") g.map(sns.regplot, "harmony", "activation") g.set(title=f"Harmony vs. Activation (epoch {epoch})") plt.show() if save: fig = g.get_figure() fig.savefig(outpath + f"harmony_activation_{epoch}") if what == "regplot": g = sns.relplot(data=df, x="activation", y="harmony", hue="inpName", palette="deep", kind="line") g.set(title=f"Harmony vs. Activation (epoch {epoch})") plt.show() if save: fig = g.get_figure() fig.savefig(outpath + f"harmony_activation_{epoch}") if what == "harm_dist_inp": g = sns.displot(data=df, x="harmony", hue="inpName", palette="deep", multiple="dodge") g.set(title=f"Harmony vs. Activation (epoch {epoch})") plt.show() if save: fig = g.get_figure() fig.savefig(outpath + f"harmony_dist_{epoch}") if what == "act_dist_inp": g = sns.displot(data=df, x="activation", hue="inpName", palette="deep", multiple="dodge") g.set(title=f"Harmony vs. Activation (epoch {epoch})") plt.show() if save: fig = g.get_figure() fig.savefig(outpath + f"activation_dist_{epoch}") if what == "harmony_dev": # Progressive harmony g = sns.FacetGrid(df, col="inpName", height=2) g.map(sns.distplot, "harmony") plt.show() if save: fig = g.get_figure() fig.savefig(outpath + f"harmony_distribution_{epoch}") return df