def plot_paired_genes_facet(table, output=None):
    """Hexbin plot of RNA vs. aCGH log2 values.

    Facet by segment size: <5MB<50MB<
    """
    xymin = min(min(table['RNA']), min(table['aCGH']))
    xymax = max(max(table['RNA']), max(table['aCGH']))
    pad = 0.3
    xy_limits = (xymin - pad, xymax + pad)
    nbins = 50
    size_labels = ['>50MB', '5-50MB', '<5MB']
    grid = seaborn.FacetGrid(
        table,
        col='Size',
        col_order=size_labels,
        xlim=xy_limits,
        ylim=xy_limits,
        # subplot_kws={'aspect': 1},
        margin_titles=True)

    # This shows blue lines around the hex bins :'(
    #  grid.map(plt.hexbin, 'aCGH', 'RNA',
    #           bins='log', gridsize=nbins, mincnt=1,
    #           edgecolors='red',
    #          )

    # Plot a diagonal line, summary stats, and hex bins
    for ax, size_label in zip(grid.axes.flat, size_labels):
        # Draw the 1:1 diagonal as an overlaid line plot
        # Use the legend to draw the annotation
        # (adapted from seaborn.axisgrid.JointGrid.annotate)
        data_subset = table[table['Size'] == size_label]
        pearson_r, _p = corr_stats(data_subset['aCGH'], data_subset['RNA'])
        N = len(data_subset)
        annotation = "Pearson r = {:.3f}\nN = {}".format(pearson_r, N)
        ax.text(xy_limits[0] + 1,
                xy_limits[1] - 1,
                annotation,
                fontsize='x-small',
                verticalalignment='top')
        ax.plot(xy_limits,
                xy_limits,
                color='lightgray',
                linestyle='-',
                linewidth=1,
                zorder=-1)
        ax.hexbin(
            data_subset['aCGH'],
            data_subset['RNA'],
            bins='log',
            gridsize=nbins,
            mincnt=1,
        )
        ax.set_xlabel("aCGH")
        ax.set_title(size_label)
    grid.axes.flat[0].set_ylabel("RNA")

    if output:
        plt.savefig(output, format='pdf', bbox_inches='tight')
        print("Wrote", output, file=sys.stderr)
    else:
        plt.show()
        # Evaluate predictions
        meas, pred = ys_test[test].values, lm.predict(xs_test.ix[test])

        rsquared = r2_score(meas, pred)
        cor, pval = pearsonr(meas, pred)

        # Store results
        lm_res.append((ion, condition, cor, pval, rsquared, lm))

lm_res = DataFrame(lm_res, columns=['ion', 'condition', 'cor', 'pval', 'rsquared', 'lm'])
print lm_res.sort('rsquared')

# Plot General Linear regression boxplots
sns.set(style='ticks', font_scale=.75, context='paper', rc={'axes.linewidth': .3, 'xtick.major.width': .3, 'ytick.major.width': .3})
g = sns.FacetGrid(lm_res, legend_out=True, aspect=1., size=1.5, sharex=True, sharey=False)
g.map(sns.boxplot, 'cor', 'condition', palette=palette, sym='', linewidth=.3, order=label_order, orient='h')
g.map(sns.stripplot, 'cor', 'condition', palette=palette, jitter=True, size=2, split=True, edgecolor='white', linewidth=.3, order=label_order, orient='h')
g.map(plt.axvline, x=0, ls='-', lw=.1, c='gray')
plt.xlim([-1, 1])
g.set_axis_labels('Pearson correlation\n(measured ~ predicted)', '')
g.set_titles(row_template='{row_name}')
g.fig.subplots_adjust(wspace=.05, hspace=.2)
sns.despine(trim=True)
plt.savefig('%s/reports/lm_dynamic_boxplots_tfs_gsea.pdf' % wd, bbox_inches='tight')
plt.close('all')
print '[INFO] Plot done'


# Top predicted metabolites boxplots
# lm_res[(lm_res['cor'] > 0) & (lm_res['pval'] < .05)]
Example #3
0
                            as_index=False).mean().sort_values(by='pregnant',
                                                               ascending=True))
#
import matplotlib.pyplot as plt
import seaborn as sns
#
plt.figure(figsize=(12, 12))
sns.heatmap(df.corr(),
            linewidths=0.1,
            vmax=0.5,
            cmap=plt.cm.gist_heat,
            linecolor='white',
            annot=True)
plt.show()

grid = sns.FacetGrid(df, col='class')
grid.map(plt.hist, 'plasma', bins=10)
plt.show()

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np
import tensorflow as tf

# seed 값 생성
np.random.seed(3)
tf.random.set_seed(3)

# 데이터 로드
dataset = np.loadtxt('./deeplearning/dataset/pima-indians-diabetes.csv',
                     delimiter=",")
Example #4
0
                                        'Title']).Age.transform('median')

print(df_train["Age"].isnull().sum())

# In[ ]:

#Let's see the result of the inputation
sns.distplot(df_train["Age"], bins=24)
plt.title("Distribuition and density by Age")
plt.xlabel("Age")
plt.show()

# In[ ]:

#separate by survivors or not
g = sns.FacetGrid(df_train, col='Survived', size=5)
g = g.map(sns.distplot, "Age")
plt.show()

# Now let's categorize them

# In[ ]:

#df_train.Age = df_train.Age.fillna(-0.5)

interval = (0, 5, 12, 18, 25, 35, 60, 120)
cats = ['babies', 'Children', 'Teen', 'Student', 'Young', 'Adult', 'Senior']

df_train["Age_cat"] = pd.cut(df_train.Age, interval, labels=cats)

df_train["Age_cat"].head()
Example #5
0
stock_returns.mean()
stock_returns.cov()

#%% some plot

cum_returns = np.cumprod(1 + stock_returns) - 1
cum_returns.index = stock_returns.index
cum_returns.plot()
plt.legend(loc="upper left")
plt.ylabel("Return of $1 on first date")
plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1))

meltt = stock_returns.melt(var_name="column")
sns.displot(meltt, x='value', hue='column', bins="sqrt", kde=True)
g = sns.FacetGrid(meltt, col='column', col_wrap=2)
g.map(sns.histplot, 'value', bins="sqrt", kde=True)

#%% model

mdl_data = {
    "N": len(stock_returns),
    "N_stocks": len(stock_names),
    "observations": stock_returns.values
}
modelfile = "stocks.stan"
with open(modelfile, "w") as file:
    file.write("""
	data { // avoid putting data in matrix except for linear algebra
		int<lower=0> N;
		int<lower=0> N_stocks;
Example #6
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

test = pd.read_csv('test.csv')
train = pd.read_csv('train.csv')

print(train.describe())
df = train.groupby("Embarked").mean()
print(df)

grid = sns.FacetGrid(train, row='Embarked', size=2.2, aspect=1.6)
grid.map(sns.pointplot, 'Pclass', 'Survived', 'Sex', palette='deep')
grid.add_legend()
plt.show()
Example #7
0

train.head()


# In[14]:


train.drop("Name",axis=1,inplace=True)
test.drop("Name",axis=1,inplace=True)


# In[15]:


facet=sns.FacetGrid(train,hue="Survived",aspect=4)
facet.map(sns.kdeplot,"Age",shade=True)
facet.set(xlim=(0,train["Age"].max()))
facet.add_legend()
plt.show()


# In[16]:


facet=sns.FacetGrid(train,hue="Survived",aspect=4)
facet.map(sns.kdeplot,"Age",shade=True)
facet.set(xlim=(0,train["Age"].max()))
facet.add_legend()
plt.xlim(20,30)
Example #8
0
## 4. Generating A Kernel Density Plot ##

sns.kdeplot(titanic['Age'], shade=True)
plt.xlabel("Age")
plt.show()

## 5. Modifying The Appearance Of The Plots ##

sns.set_style("white")
sns.kdeplot(titanic["Age"],shade=True)
plt.xlabel("Age")
sns.despine(left=True, bottom=True)

## 6. Conditional Distributions Using A Single Condition ##

g=sns.FacetGrid(titanic,col="Pclass",size=6)
g.map(sns.kdeplot,"Age",shade=True)
sns.despine(left=True,bottom=True)
plt.show()

## 7. Creating Conditional Plots Using Two Conditions ##

g = sns.FacetGrid(titanic, col="Survived", row="Pclass")
g.map(sns.kdeplot, "Age", shade=True)
sns.despine(left=True, bottom=True)
plt.show()

## 8. Creating Conditional Plots Using Three Conditions ##

g = sns.FacetGrid(titanic, col="Survived", row="Pclass",hue="Sex",size=3)
g.map(sns.kdeplot, "Age", shade=True)
Example #9
0
def draw_example(example, myo_data):
    import seaborn as sns
    import matplotlib.pyplot as plt
    import pandas as pd

    print(np.shape(example))

    if myo_data:
        indexes = pd.Series(range(1, 5 * len(example[0]) + 1, 5), name="Time")
    else:
        indexes = pd.Series(range(1, len(example[0]) + 1), name="Time")
    dictionary_labels = {}
    for i in range(len(example)):
        dictionary_labels.update({str(i + 1): example[i, :]})

    data = pd.DataFrame(data=np.swapaxes(example, 1, 0),
                        columns=pd.Series(list(dictionary_labels.keys()),
                                          name="Channel"),
                        index=indexes)
    data = data.cumsum(axis=0).stack().reset_index(name="val")
    print(data.keys())
    sns.set(style="whitegrid", font_scale=4)
    g = sns.FacetGrid(data,
                      col="Channel",
                      col_wrap=1,
                      height=3.5,
                      despine=True,
                      size=16)

    def signal_plot(x, y, **kwargs):
        ax = plt.gca()
        data = kwargs.pop("data")
        #data.plot(x=x, y=y, sharex=True, sharey=True, ax=ax, linewidth=10, grid=False, **kwargs)
        data.plot(x=x,
                  y=y,
                  sharex=True,
                  sharey=True,
                  ax=ax,
                  linewidth=1,
                  grid=False,
                  **kwargs)

    g.map_dataframe(signal_plot, "Time", "val")
    g.set_ylabels("")
    g.set_xlabels("")
    g.set_xticklabels("")
    g.set_yticklabels("")
    g.set_titles("")
    plt.show()

    if myo_data:
        frequency = 200
    else:
        frequency = 1000
    from scipy import fftpack
    X = fftpack.fft(example[0])
    freqs = fftpack.fftfreq(len(example[0])) * frequency

    fig, ax = plt.subplots()

    ax.stem(freqs, np.abs(X))
    ax.set_xlabel('Frequency in Hertz [Hz]')
    ax.set_ylabel('Frequency Domain (Spectrum) Magnitude')
    ax.set_xlim(-frequency / 2, frequency / 2)
    plt.show()
Example #10
0
def viz_cat_cont_density(df, features, target):
    for feature in features:
        sns.FacetGrid(df, row=feature, size=8).map(sns.kdeplot,
                                                   target).add_legend()
        plt.xticks(rotation=45)
Example #11
0
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
"""
 画直方图
"""
tips = sns.load_dataset("tips")
g = sns.FacetGrid(tips, col='time')
g.map(plt.hist, "tip")
"""
 画散点图
"""
g = sns.FacetGrid(tips, col='sex', hue='smoker')  # 设置参数hue,分类显示
g.map(plt.scatter, "total_bill", "tip", alpha=0.7)  # 参数alpha,设置点的大小
g.add_legend()  # 加注释
# Let's try the factorplot again!
sns.catplot(x='Pclass', kind='count', data=titanic_df, hue='person')

# In[27]:

# Getting a distribution of ages
titanic_df['Age'].hist(bins=70)

# In[29]:

#Getting a quick comparison of male, female, child
titanic_df['person'].value_counts()

# In[33]:

fig = sns.FacetGrid(titanic_df, hue='Sex', aspect=4)

fig.map(sns.kdeplot, 'Age', shade=True)

# Set the x max limit by the oldest passenger
oldest = titanic_df['Age'].max()

#Since we know no one can be negative years old set the x lower limit at 0
fig.set(xlim=(0, oldest))

#Finally add a legend
fig.add_legend()

# In[35]:

fig = sns.FacetGrid(titanic_df, hue='person', aspect=4)
Example #13
0
                         palette="muted")
g_sibsp = g_sibsp.set_ylabels("survival probability")
plt.show()

# Parch和survived之间的关系
g_parch = sns.factorplot(x="Parch",
                         y="Survived",
                         data=train,
                         kind="bar",
                         size=6,
                         palette="muted")
g_parch = g_parch.set_ylabels("survival probabitlity")
plt.show()

# Age和survived的关系
g_age = sns.FacetGrid(train, col='Survived')
g_age = g_age.map(sns.distplot, "Age")
plt.show()

# Age曲线分布
g = sns.kdeplot(train["Age"][(train["Survived"] == 0)
                             & (train["Age"].notnull())],
                color="Red",
                shade=True)
g = sns.kdeplot(train["Age"][(train["Survived"] == 1)
                             & (train["Age"].notnull())],
                ax=g,
                color="Blue",
                shade=True)
g.set_xlabel("Age")
g.set_ylabel("Frequency")
Example #14
0
# Rotate tick marks for visibility
plt.yticks(rotation=0)
plt.xticks(rotation=90)

# Show the plot
plt.show()
plt.clf()




#Create a FacetGrid that shows a point plot of the Average SAT scores SAT_AVG_ALL.
#Use row_order to control the display order of the degree types.
# Create FacetGrid with Degree_Type and specify the order of the rows using row_order
g2 = sns.FacetGrid(df,
                   row="Degree_Type",
                   row_order=['Graduate', 'Bachelors', 'Associates', 'Certificate'])

# Map a pointplot of SAT_AVG_ALL onto the grid
g2.map(sns.pointplot, 'SAT_AVG_ALL')

# Show the plot
plt.show()
plt.clf()





#Create a factorplot() that contains a boxplot (box) of Tuition values varying by Degree_Type across rows.
# Create a factor plot that contains boxplots of Tuition values
Example #15
0

social_trial_by_trial = trial_by_trial[trial_by_trial['treatment'] == 3]
results = social_trial_by_trial.groupby('sid').apply(linear_decomp)

# Normaize or not
#normalized = results.apply(lambda row: row/np.sum(row), axis = 1)
normalized = results

normalized.reset_index(inplace=True)
normalized.rename(columns={'level_1': 'age_group'}, inplace=True)
normalized_long = normalized.set_index(['sid',
                                        'age_group']).stack().reset_index()
normalized_long.rename(columns={'level_2': 'norm', 0: 'beta'}, inplace=True)


#normaized_long = normaized.stack([['greedy', 'equality', 'socialmax', 'other']])
#%%plot all betas
def tdc_factor(data, **kwargs):
    sb.pointplot(x='age_group', y='beta', data=data)


#group['treatment_name'] = [treatments[x] for x in group.treatment]
#plt.figure()
g = sb.FacetGrid(normalized_long, col='norm', col_wrap=2)
g = g.map_dataframe(tdc_factor)
#plt.subplots_adjust(top=0.9)
#g.fig.suptitle('Group '+ str(name))

#%%
#sb.factorplot(x='level_1', y = 'greedy', data = normaized, kind="point")
#!/usr/bin/env python
# coding: utf-8

# In[1]:

import seaborn as sns
tips = sns.load_dataset("tips")
tips

# In[4]:

empty = sns.FacetGrid(tips)

# In[5]:

one = sns.FacetGrid(tips, col="time", row="smoker")

# In[6]:

import matplotlib.pyplot as plt

# In[ ]:

sns.FacetGrid(tips)
Example #17
0
# ## Imports
#
# **Import the data visualization libraries if you haven't done so already.**

# In[101]:

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('white')
get_ipython().run_line_magic('matplotlib', 'inline')

# **Use FacetGrid from the seaborn library to create a grid of 5 histograms of text length based off of the star ratings. Reference the seaborn documentation for hints on this**

# In[102]:

g = sns.FacetGrid(yelp, col='stars')
g.map(plt.hist, 'text length')

# **Create a boxplot of text length for each star category.**

# In[103]:

sns.boxplot(x='stars', y='text length', data=yelp, palette='rainbow')

# **Create a countplot of the number of occurrences for each type of star rating.**

# In[104]:

sns.countplot(x='stars', data=yelp, palette='rainbow')

# ** Use groupby to get the mean values of the numerical columns, you should be able to create this dataframe with the operation:**
# fill NaN values in Age column with random values generated
titanic_df["Age"][np.isnan(titanic_df["Age"])] = rand_1
test_df["Age"][np.isnan(test_df["Age"])] = rand_2
​
# convert from float to int
titanic_df['Age'] = titanic_df['Age'].astype(int)
test_df['Age']    = test_df['Age'].astype(int)
        
# plot new Age Values
titanic_df['Age'].hist(bins=70, ax=axis2)
# test_df['Age'].hist(bins=70, ax=axis4)

# .... continue with plot Age column
​
# peaks for survived/not survived passengers by their age
facet = sns.FacetGrid(titanic_df, hue="Survived",aspect=4)
facet.map(sns.kdeplot,'Age',shade= True)
facet.set(xlim=(0, titanic_df['Age'].max()))
facet.add_legend()
​
# average survived passengers by age
fig, axis1 = plt.subplots(1,1,figsize=(18,4))
average_age = titanic_df[["Age", "Survived"]].groupby(['Age'],as_index=False).mean()
sns.barplot(x='Age', y='Survived', data=average_age)

# Cabin
# It has a lot of NaN values, so it won't cause a remarkable impact on prediction
titanic_df.drop("Cabin",axis=1,inplace=True)
test_df.drop("Cabin",axis=1,inplace=True)

# Family
Example #19
0
def display_way(vec_names,
                values,
                annotation,
                words,
                steps_between,
                x_label="instance",
                y_label="activation",
                model_path="modelXXX",
                pretrained=False):
    """Display a list of vectors."""
    df = pd.DataFrame({
        x_label: vec_names,
        y_label: values,
        'concept': annotation
    })

    # if pretrained:
    #     write_to_csv(
    #         df, "{}/continuous_activation_pretrained_{}_{}_k{}_"
    #         "{}.csv".format(model_path, words[0], words[1], steps_between,
    #                         model_path))

    # else:
    #     write_to_csv(
    #         df, "{}/continuous_activation_{}_{}_k{}_{}.csv".format(
    #             model_path, words[0], words[1], steps_between, model_path))

    g = sns.FacetGrid(df, height=7)

    def plotter(x, y, **kwargs):
        regplot = sns.regplot(data=df,
                              x=x_label,
                              y=y_label,
                              fit_reg=False,
                              marker="x",
                              color="darkred")

        plt.plot(x, y, linewidth=1, color="darkred")

        tick_labels = regplot.get_xticklabels()

        for j, tick in enumerate(tick_labels):
            # rotate all labels by 90 degrees
            tick.set_rotation(90)
            tick.set_weight("light")
            if j == 0 or j == len(tick_labels) - 1:
                tick.set_weight("normal")

        for i in range(len(x)):
            plt.annotate(annotation[i],
                         xy=(i, y.values[i]),
                         fontsize=8,
                         xytext=(0, 50),
                         textcoords="offset points",
                         rotation=90)

    g.map(plotter, x_label, y_label)

    file_name = "{}/vector_way_{}_{}_{}_{}.eps".format(model_path, y_label,
                                                       words[0], words[1],
                                                       model_path)
    plt.savefig(file_name)

    plt.show()
Example #20
0
import seaborn as sns
import glob
import re
import matplotlib.pyplot as plt

files = glob.glob('data/*.csv')

dfs = []
for a in files:
    df = pd.read_csv(a)
    df['channel'] = re.search('avg|diff|none', a).group()

    if re.search('norm', a) and re.search('angle', a):
        df['options'] = 'both'
    elif re.search('norm', a):
        df['options'] = 'norm'
    elif re.search('angle', a):
        df['options'] = 'angle'
    else:
        df['options'] = 'none'
    dfs.append(df)

all = pd.concat(dfs)
all.columns.values[0] = 'epoch'

g = sns.FacetGrid(all, col='channel', row='options')
g = g.map(plt.plot, 'epoch', 'val_loss', color='blue')
g = g.map(plt.plot, 'epoch', 'loss', color='red')
g = g.set(ylim=(0, 1))
g.savefig('data/plot.png')
layout = go.Layout(margin=dict(l=0, r=0), scene=Scene, height=1000, width=1000)
data = [trace]
fig = go.Figure(data=data, layout=layout)
fig.show()

cluster_tsne_profile = pd.merge(X11,
                                clusters_tsne_scale['tsne_clusters'],
                                left_index=True,
                                right_index=True)
cluster_pca_profile = pd.merge(X11,
                               clusters_pca_scale['pca_clusters'],
                               left_index=True,
                               right_index=True)

for c in cluster_pca_profile:
    grid = sns.FacetGrid(cluster_pca_profile, col='pca_clusters')
    grid.map(plt.hist, c)

for c in cluster_tsne_profile:
    grid = sns.FacetGrid(cluster_tsne_profile, col='tsne_clusters')
    grid.map(plt.hist, c)

plt.figure(figsize=(15, 10))
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 15))
sns.scatterplot(
    data=cluster_pca_profile,
    x='Debt ratio %',
    y='Working Capital to Total Assets',
    hue='pca_clusters',
    s=85,
    alpha=0.4,
# -*- coding: utf-8 -*-
"""
Created on Mon Aug  5 15:41:26 2019

@author: Gareth
"""

import seaborn as sns
sns.set()

# Load the example iris dataset
#planets = sns.load_dataset("ucdp_")

#cmap = sns.cubehelix_palette(rot=-.2, as_cmap=True)
#ax = sns.scatterplot(x="deaths_a", y="deaths_b",
#                     hue="type_of_violence", size="deaths_civilians",
#                     sizes=(10, 200),
#                     data=africa)

g = sns.FacetGrid(africa,
                  hue="latitude",
                  subplot_kws=dict(projection='polar'),
                  height=4.5,
                  sharex=False,
                  sharey=False,
                  despine=False)

# Draw a scatterplot onto each axes in the grid
g.map(sns.scatterplot, "longitude", "best")
Example #23
0
iris = sb.load_dataset('iris')
iris.head()
iris['species'].unique()
sb.pairplot(data=iris)

# PairGrid
sb.PairGrid(data=iris)
g = sb.PairGrid(data=iris).map(func=plt.scatter)
g = sb.PairGrid(data=iris).map_diag(func=sb.distplot)
g = sb.PairGrid(data=iris).map_diag(func=sb.distplot).map_upper(
    func=plt.scatter)
g = sb.PairGrid(data=iris).map_diag(func=sb.distplot).map_upper(
    func=plt.scatter).map_lower(func=sb.kdeplot)

# FacetGrid
g = sb.FacetGrid(data=tips, col='time',
                 row='smoker').map(sb.distplot, 'total_bill')
g = sb.FacetGrid(data=tips, col='time',
                 row='smoker').map(sb.scatterplot, 'total_bill', 'tip')

# Regression Plots -----------------------------------------------------------

tips = sb.load_dataset('tips')
tips.head()

# Linear Model Plots
sb.lmplot(x='total_bill', y='tip', data=tips)
sb.lmplot(x='total_bill', y='tip', data=tips, hue='sex')
sb.lmplot(x='total_bill', y='tip', data=tips, hue='sex', markers=['^', '1'])
sb.lmplot(x='total_bill',
          y='tip',
          data=tips,
Example #24
0
Created on Thu Sep 12 00:07:02 2019

@author: Hermii
"""
import pandas as pd
import seaborn as sns

path = "C:/Users/Hermii/Desktop/Data Challenge 3/repo/jbg060/code/"
#csv you get from temp.py
comb = pd.read_csv(path + "both_pumps.csv")

#Specify only "flow data" and 1 pump
flow = comb[(comb["measurementType"] == "Debietmeting.Q")
            & (comb["City.PumpType"] == "GBS_DB.RG8150")]
flow["TimeStamp"] = pd.to_datetime(flow["TimeStamp"])

flow["day"] = flow["TimeStamp"].dt.day_name()
flow["hour"] = flow["TimeStamp"].dt.hour

g = sns.FacetGrid(data=flow.groupby(
    ["day",
     "hour"]).hour.count().to_frame(name='day_hour_count').reset_index(),
                  col='day',
                  col_order=[
                      'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
                      'Friday', 'Saturday'
                  ],
                  col_wrap=3)

g.map(sns.barplot, "hour", "day_hour_count")

#This simple analysis confirms our assumptions as decisions for subsequent workflow stages.
#이작업은 후속 작업을 가정 선택하는데 도움을 준다. 
#We should consider Age (our assumption classifying #2) in our model training.
#나이를 변수로 고려
#Complete the Age feature for null values (completing #1).
#나이에 널값을 채워라
#We should band age groups (creating #3).
# 나이를 구간화해라


# In[ ]:


g = sns.FacetGrid(train_df, col='Survived')
g.map(plt.hist, 'Age', bins=20)
#구간을 20씩 잡고 나이-생존율 히스토그램 생성


# In[ ]:


#Correlating numerical and ordinal features
#We can combine multiple features for identifying correlations using a single plot. 
#This can be done with numerical and categorical features which have numeric values.
#숫자형 변수와 순서형 변수의 상관관계 분석
#한개의 그래프에서 상관관계 분석을 위해서 여러개의 변수를 동시에 나타낼 수 있다.
#이것은 숫자형 변수와 숫자값을 포함하는 범주형간에 작업을 할 수 있다.

#Observations.
Example #26
0
df_unroll["LOOP_TYPE"] = "UNROLLED"
df_roll["LOOP_TYPE"] = "ROLLED"
df = df_roll.append(df_unroll)

filtered_df = df[(df.PHI_JET_SIZE == df.ETA_JET_SIZE)
                 & (df.ETA_GRID_SIZE >= df.ETA_JET_SIZE) &
                 (df.PHI_GRID_SIZE >= df.PHI_JET_SIZE) &
                 (df.NUMBER_OF_SEEDS == 128)]

#filtered_df = df[["PHI_JET_SIZE", "ETA_GRID_SIZE", "PHI_GRID_SIZE", "NUMBER_OF_SEEDS", "maxOverallLatency"]]

sns.set_style("whitegrid")

facet = sns.FacetGrid(filtered_df,
                      row="ETA_GRID_SIZE",
                      col="PHI_JET_SIZE",
                      hue="LOOP_TYPE")
facet = facet.map(sns.lineplot, "PHI_GRID_SIZE", "minOverallLatency",
                  ci=None).add_legend()
facet.fig.set_size_inches(16, 9)
facet.savefig(saveFolder + "/minOverallLatency.pdf")

facet = sns.FacetGrid(filtered_df,
                      row="ETA_GRID_SIZE",
                      col="PHI_JET_SIZE",
                      hue="LOOP_TYPE")
facet = facet.map(sns.lineplot, "PHI_GRID_SIZE", "maxOverallLatency",
                  ci=None).add_legend()
facet.fig.set_size_inches(16, 9)
facet.savefig(saveFolder + "/maxOverallLatency.pdf")
df_data['Dimension'] = df_data.apply(
    lambda x: x['Dimension Type'] + " " + str(x['Dimension']), axis=1)
print("Data crunched!")

g = sns.FacetGrid(
    df_data,
    col='Metric',
    row='Dimension',
    hue='Dimension Count',
    margin_titles=True,
    sharey=False,
    col_order=(
        sorted([x for x in df_data['Metric'].unique() if 'Inverse' in x]) +
        sorted([x
                for x in df_data['Metric'].unique() if 'Inverse' not in x], )),
    row_order=(
        sorted(
            [x for x in df_data['Dimension'].unique() if 'Mean' in x],
            key=lambda str: next(int(s) for s in str.split() if s.isdigit())) +
        sorted(
            [x for x in df_data['Dimension'].unique() if 'Minimum' in x],
            key=lambda str: next(int(s) for s in str.split() if s.isdigit())) +
        sorted(
            [x for x in df_data['Dimension'].unique() if 'Euclidean' in x],
            key=lambda str: next(int(s) for s in str.split() if s.isdigit()))))
g.map(sns.distplot, name, hist=False, rug=True)

outfile = kn.pack({
    'title':
    kn.unpack(dataframe_filename)['title'],
Example #28
0
def plot_gc_landscape():

    df, w, q = df_from_files(snakemake.input.data_files)

    # replace NaNs introduced by the outer merge with the prediction with zeros
    # NaNs occur, when segment lengths above K occur
    df = df.fillna(0)

    df = filter_combinations(df)

    # split values above w and below (including) w
    # for the values below w summ up all values above w
    # into on entry at w + 5
    summed, above_w = sum_and_scatter(df, w)

    summed["Expected Prob Line"] = summed["Expected Probability"].where(
        summed["Segment Length"] <= w)
    # plot empiric distributions
    heights = {
        30: 4,
        50: 5,
        100: 6,
    }
    sns.set(
        font="DejaVu Sans",
        style=sns.axes_style("whitegrid", {'grid.linestyle': '--'}),
        font_scale=1.6,
    )

    g = sns.FacetGrid(
        summed,
        row="hf+canon",
        col="GC-content",
        height=heights[w],
        aspect=1.1,
        margin_titles=True,
        hue="canonicity",
    )
    g.fig.suptitle(f"Segment Length Distributions for $w={w}$, $q={q}$",
                   y=1.01)
    g1 = g.map(plt.bar, "Segment Length", "prob")

    # fix broken z-order
    # save lower layer (layer 1, barplot)
    # so that later layers can be put above
    backgroundartists = []
    for ax in g1.axes.flat:
        for li in ax.lines + ax.collections:
            li.set_zorder(1)
            backgroundartists.append(li)
        beyond_w_bar = [
            rect for rect in ax.get_children() if isinstance(rect, Rect)
        ][-2]
        # take the color of the bar and make it darker
        (col_r, col_g, col_b, col_a) = beyond_w_bar.get_fc()
        col_h, col_l, col_s = colorsys.rgb_to_hls(col_r, col_g, col_b)
        col_r, col_g, col_b = colorsys.hls_to_rgb(col_h, col_l - 0.1, col_s)
        beyond_w_bar.set_color((col_r, col_g, col_b, col_a))

    # plot predicted points and manually place them on a higher layer than the bars
    g2 = g.map(sns.scatterplot,
               "Segment Length",
               "Expected Probability",
               color="black")
    for ax in g2.axes.flat:
        for li in ax.lines + ax.collections:
            if li not in backgroundartists:
                li.set_zorder(5)
    g.map(
        sns.lineplot,
        "Segment Length",
        "Expected Prob Line",
        color="black",
        alpha=0.7,
        palette=sns.color_palette("Set2_r"),
    )
    # Adjust ticks so that the summed values at the dummy offset are labeled
    # correctly currently the dummy value is at w + 5
    for ax in g2.axes.flat:
        labels = [
            item.get_text() if item.get_text() != f"{w+5}" else ">w"
            for item in ax.get_xticklabels()
        ]
        ax.set_xticklabels(labels)

        label = ax.get_ylabel()
        ax.set_ylabel(
            label if label != "Expected Prob Line" else "Probability")

    g.set(yscale="log")
    sns.despine()
    plt.savefig(snakemake.output.gc_landscape_pdf, bbox_inches='tight')
Example #29
0
sns.jointplot('家賃', '大きさ', data=df)
plt.show()

sns.pairplot(df)
plt.show()

# 箱ひげ図
sns.boxplot('近さ', '家賃', data=df)
plt.show()

# 近さごとに色分けしてヒストグラム、散布図
sns.pairplot(df, hue='近さ')
plt.show()

# 複数のグラフを並べる
g = sns.FacetGrid(df, col='近さ')
g.map(plt.hist, '家賃')
plt.show()

g = sns.FacetGrid(df, col='方角', hue='近さ', col_wrap=4)
g.map(plt.scatter, '大きさ', '家賃')
plt.show()

# 近さによって家賃の分布がどう変わるかを調べる
# t検定
print(stats.ttest_ind(df[df['近さ'] == 'A']['家賃'], df[df['近さ'] == 'B']['家賃']))

# 大きさと家賃の関係
# 線形回帰
print(stats.linregress(df['大きさ'], df['家賃']))
sns.lmplot('大きさ', '家賃', data=df)
Example #30
0
    def plot_input_tstep(self,
                         epoch,
                         save=False,
                         outpath="data/plots/",
                         what="activation"):
        """Plot harmony of single activation states for all stimuli at a
        given training epoch"""

        states = self.data['S_trace']

        # Initialize the tensors
        # sate_sum saves the activation values as sum of all activations
        # it has dim (inputs, act/timestep, harmony/timestep)
        states_sum = torch.zeros(len(self.inputNames), states.shape[2], 4)
        timesteps = torch.arange(0, states.shape[2])

        for i in range(len(self.inputNames)):
            harmonies = self.data['Harmony_trace'][i, epoch, :]
            s = states[i, epoch, :, :, :]
            for tstep in range(s.shape[0]):
                act = s[tstep, :, :].sum()
                states_sum[i, tstep, 0] = i  # input number
                states_sum[i, tstep, 1] = tstep
                states_sum[i, tstep, 2] = act  # activation state (sum of)
                # Harmony at that state
                states_sum[i, tstep, 3] = harmonies[tstep]

        # Join all data
        collapsed = states_sum.view(
            (len(self.inputNames) * states_sum.shape[1], 4))
        collapsed = collapsed.numpy()

        # Build list of  input names
        stim_names = []
        for name in self.inputNames:
            for i in range(states.shape[2]):
                stim_names.append(name)

        df = pd.DataFrame(collapsed,
                          columns=["input", "tstep", "activation", "harmony"])
        df['inpName'] = stim_names

        if what == "activation":
            p = sns.relplot(data=df,
                            y="activation",
                            x="tstep",
                            hue="inpName",
                            kind="line",
                            palette="viridis")
            p.set(title=f"Activation over time (epoch {epoch})")
            plt.show()

            if save:
                fig = p.get_figure()
                fig.savefig(outpath + f"all_activations_{epoch}")

        if what == "harmony":
            p = sns.relplot(data=df,
                            y="harmony",
                            x="tstep",
                            hue="inpName",
                            kind="line",
                            palette="viridis")
            p.set(title=f"Harmony over time (epoch {epoch})")
            plt.show()

            if save:
                fig = p.get_figure()
                fig.savefig(outpath + f"all_harmonies_{epoch}")

        if what == "regplot_facet":
            g = sns.FacetGrid(data=df,
                              hue="inpName",
                              col="inpName",
                              palette="deep")
            g.map(sns.regplot, "harmony", "activation")
            g.set(title=f"Harmony vs. Activation (epoch {epoch})")
            plt.show()

            if save:
                fig = g.get_figure()
                fig.savefig(outpath + f"harmony_activation_{epoch}")

        if what == "regplot":
            g = sns.relplot(data=df,
                            x="activation",
                            y="harmony",
                            hue="inpName",
                            palette="deep",
                            kind="line")
            g.set(title=f"Harmony vs. Activation (epoch {epoch})")
            plt.show()

            if save:
                fig = g.get_figure()
                fig.savefig(outpath + f"harmony_activation_{epoch}")

        if what == "harm_dist_inp":
            g = sns.displot(data=df,
                            x="harmony",
                            hue="inpName",
                            palette="deep",
                            multiple="dodge")
            g.set(title=f"Harmony vs. Activation (epoch {epoch})")
            plt.show()

            if save:
                fig = g.get_figure()
                fig.savefig(outpath + f"harmony_dist_{epoch}")

        if what == "act_dist_inp":
            g = sns.displot(data=df,
                            x="activation",
                            hue="inpName",
                            palette="deep",
                            multiple="dodge")
            g.set(title=f"Harmony vs. Activation (epoch {epoch})")
            plt.show()

            if save:
                fig = g.get_figure()
                fig.savefig(outpath + f"activation_dist_{epoch}")

        if what == "harmony_dev":
            # Progressive harmony
            g = sns.FacetGrid(df, col="inpName", height=2)
            g.map(sns.distplot, "harmony")
            plt.show()

            if save:
                fig = g.get_figure()
                fig.savefig(outpath + f"harmony_distribution_{epoch}")
        return df