Esempio n. 1
0
def plot_multimodal_bars(PP):
    sns.catplot(data=PP,x='perception',y='logLikelihood',
                   hue='pragmatics',kind='bar',
                   order=['pool1','conv42','fc6'],
                   palette='Paired',
                   legend=False,
                   ci=None)
    plt.ylabel('log likelihood')
    locs, labels = plt.xticks([0,1,2],['early','mid','high'],fontsize=14)
    plt.xlabel('visual features')
    # plt.ylim([-3000,0])
    plt.tight_layout()
    plt.savefig('./plots/loglikelihood_models_multimodal.pdf')
Esempio n. 2
0
def plot_human_bars(PP):
    sns.catplot(data=PP,x='pragmatics',y='logLikelihood',
                   hue='production',kind='bar',
                   order=['S0','combined'],
                   hue_order=['nocost','cost'],
                   palette='Paired',
                   legend=False,
                   ci=None)
    plt.ylabel('log likelihood')
    locs, labels = plt.xticks([0,1],['insensitive','sensitive'],fontsize=14)
    plt.xlabel('context')
    # plt.ylim([-3000,0])
    plt.tight_layout()
    plt.savefig('./plots/loglikelihood_models_human.pdf')
Esempio n. 3
0
	## classical hypothesis test
	p = scipy.stats.binom_test(p1*n1, n=n1, p=1/32, alternative='two-sided')
	print 'Closer proportion diff from chance? p = {}'.format(p)

	p = scipy.stats.binom_test(p2*n2, n=n2, p=1/32, alternative='two-sided')
	print 'Further proportion diff from chance? p = {}'.format(p)

	##### MAKE PLOTS AND SAVE OUT 

	## plot recognition accuracy by condition
	sns.set_context('poster')
	fig = plt.figure(figsize=(4,4))
	redgld=[(0.8, 0.2, 0.2),(0.9, 0.7, 0.3)]
	sns.catplot(y='correct',
					x='target_category',
					hue='condition',
					hue_order=['closer','further'],
					order=['bird','car','chair','dog'],
					data=X,kind='bar',palette=redgld)
	plt.ylim([0,1])
	plt.ylabel('proportion correct')
	plt.xlabel('category')
	h = plt.axhline(1/32,linestyle='dashed',color='black')
	plt.savefig('./plots/accuracy_by_category_and_condition.pdf')
	plt.close(fig)

	## plot recognition accuracy by condition
	plt.figure(figsize=(2,4))
	# sns.set_context('poster')
	redgld=[(0.8, 0.2, 0.2),(0.9, 0.7, 0.3)]
	sns.catplot(y='correct',
					x='condition',
Esempio n. 4
0
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_excel('./residue/无.xlsx')

sns.catplot(x="class", y="Residue", kind="box", data=df)
plt.show()
for metric in metrics:
    if 'mask' not in metric:
        df[metric + '_mean'] = df[metric].apply(np.mean)

for metric in metrics:
    if 'mask' in metric:
        continue

    # plt.figure()
    # ax = sns.swarmplot(data=df, x='site', y=metric + '_mean', hue='fake',
    #                    linewidth=.9, edgecolor="black", size=6, dodge=True)
    g = sns.catplot(x='site',
                    y=metric + '_mean',
                    hue='fold',
                    col='fake',
                    data=df,
                    kind='swarm',
                    linewidth=.9,
                    dodge=False,
                    sharey=False)
    d = sns.catplot(x='fake',
                    y=metric + '_mean',
                    hue='real_name',
                    data=df,
                    kind='point')

    #plt.xticks([sites[x] for x in df['site'].values], rotation=90)
    plt.title(metric)

# for metric in metrics:
#     if 'mask' in metric:
Esempio n. 6
0
ax.set_ylabel("Number of Observaitons")
ax.set_xticklabels(df.Year, rotation = 90 ) #Yazılar birbirine giriyorsa daha okunaklı olması için kullanılır
plt.show()
fig.savefig("Year.png")

"""**Seaborn**
> AxesSubplot

> FaceGrid
"""

sns.set_palette("RdBu")
sns.countplot(x="Year", data = df)
plt.show()

sns.catplot(x="Year", aspect=3 , data = df , kind="count")
plt.show()

g = sns.catplot(x="Year", aspect=3 , data = df , kind="count")
g.fig.suptitle("Year Counts", y=1)
plt.xticks(rotation = 90)
plt.show()

sns.scatterplot(x="Year" , y="Selling_Price", data= df, hue="New")
plt.show()

sns.relplot(x="Year" , y="Selling_Price", data= df, hue="New", kind="scatter")
plt.show()

"""**LINE PLOT**"""
top_dados_controle = dados_controle.query('composto in @cod_dados_controle')

cod_dados_droga = dados_droga['composto'].value_counts().index[0:100]
top_dados_droga = dados_droga.query('composto in @cod_dados_droga')

cod_dados_controle

top_dados_controle

cod_dados_droga

top_dados_droga

dados_controle['composto'].unique()

plot1 = sns.catplot(x='composto',data=top_dados_controle,col="dose",kind="count",col_wrap=2 ,height=4, aspect=.7)

plot2 = sns.catplot(x='composto',data=top_dados_droga,col="dose",kind="count",height=7, aspect=.8)

"""#### - achar o composto em 'cacb2b860' dados droga"""

findArray = dados_droga['composto'].unique()

exist = 'cacb2b860' in findArray
exist

"""#### - Posso concluir que o controle so foi realizado no composto 'cacb2b860'

##Separando compostos
"""
Esempio n. 8
0
                                     overwrite=False)
pipeline = make_pipeline(CSP(n_components=8), LDA())
results = evaluation.process({"csp+lda": pipeline})

# To export the results in CSV within a directory:
if not os.path.exists("./results"):
    os.mkdir("./results")
results.to_csv("./results/results_part2-2.csv")

# To load previously obtained results saved in CSV
results = pd.read_csv("./results/results_part2-2.csv")

##############################################################################
# Plotting Results
# ----------------
#
# We plot the results using the seaborn library. Note how easy it
# is to plot the results from the three datasets with just one line.

results["subj"] = [str(resi).zfill(2) for resi in results["subject"]]
g = sns.catplot(
    kind="bar",
    x="score",
    y="subj",
    col="dataset",
    data=results,
    orient="h",
    palette="viridis",
)
plt.show()
Esempio n. 9
0
# plt.show()

# Draw a boxplot with nested grouping by two categorical variables:
ax = sns.boxplot(x="day",
                 y="total_bill",
                 hue="smoker",
                 data=tips,
                 palette="Set3")
plt.savefig("boxplot with nested grouping by two categorical variables.jpg")
# plt.show()

# Draw a boxplot with nested grouping when some bins are empty:
ax = sns.boxplot(x="day", y="total_bill", hue="time", data=tips, linewidth=2.5)
plt.savefig("boxplot with nested grouping when some bins are empty.jpg")
# plt.show()

# Control box order by passing an explicit order:
ax = sns.boxplot(x="time", y="tip", data=tips, order=["Dinner", "Lunch"])
plt.show()

# Use catplot() to combine a pointplot() and a FacetGrid. This allows grouping within additional categorical variables.
# Using catplot() is safer than using FacetGrid directly, as it ensures synchronization of variable order across facets:
g = sns.catplot(x="sex",
                y="total_bill",
                hue="smoker",
                col="time",
                data=tips,
                kind="box",
                height=4,
                aspect=.7)
plt.show()
Esempio n. 10
0
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ranksums

''' Analise 5 - Hora do dia '''
gorjetas = pd.read_csv('data\\tips_tratados_4.csv',sep=',')
print(gorjetas.head())

print(gorjetas.hora_do_dia.unique())

sns.catplot(x='hora_do_dia',y='valor_da_conta',data=gorjetas)
plt.show()

# Espalha alguns pontos proximos de forma distribuida para uma melhor visualização
sns.catplot(x='hora_do_dia',y='valor_da_conta', kind='swarm',data=gorjetas)
plt.show()

# Demonstra a visualização em formato de violino, onde a maior concentração será apresentada na parte mais gorda
sns.violinplot(x='hora_do_dia',y='valor_da_conta',data=gorjetas)
plt.show()

# Demonstra a estatisticas do metodo describe de uma forma visual
sns.boxplot(x='hora_do_dia',y='valor_da_conta',data=gorjetas)
plt.show()

# Histograma = Um gráfico que tem, no eixo X, o valor da variável sendo exibida e no outro eixo, a frequência.
# Histograma do almoço
almoço = gorjetas.query("hora_do_dia == 'Almoço'").valor_da_conta
sns.distplot(almoço)
plt.show()
Esempio n. 11
0
                           axis=1,
                           keys=['Total', 'Percent'])

print(missing_data_1)

#Code ends here

# --------------

#Code starts here

#Setting the figure size
plt.figure(figsize=(10, 10))

#Plotting boxplot between Rating and Category
cat = sns.catplot(x="Category", y="Rating", data=data, kind="box", height=10)

#Rotating the xlabel rotation
cat.set_xticklabels(rotation=90)

#Setting the title of the plot
plt.title('Rating vs Category [BoxPlot]', size=20)

#Code ends here

# --------------
#Importing header files
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

#Importing header files
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
Esempio n. 12
0
ax = sns.kdeplot(data=train['Age'], shade=True, gridsize = 30)
_ = ax.set(title='Age distribution', ylabel='Distribution', xlabel='Age - months')

# %% [markdown]
# ## We've got similar number of cats and dogs. Most of them are around 1 year old with slightly higher number of female pets.

# %% [code] {"_kg_hide-input":true}
num = 10
mixed_breed_class = 307

plt.figure(figsize=(20,20))

indexes, values = train['Breed1'][(train['Type'] == 'Dog')].value_counts().index[:num], train['Breed1'].value_counts()[:num]
names = [id_to_breed(i) for i in indexes]
s = pd.Series(data={'values': values.values, 'names': names})
ax = sns.catplot(x = 'values', y = 'names' , kind='bar', data = s)
_ = ax.set(title=f'Dog breed classes top {num}', ylabel='Dog breed', xlabel='Count')
    
indexes, values = train['Breed1'][(train['Type'] == 'Cat')].value_counts().index[:num], train['Breed1'].value_counts()[:num]
names = [id_to_breed(i) for i in indexes]
s = pd.Series(data={'values': values.values, 'names': names})
ax = sns.catplot(x = 'values', y = 'names' , kind='bar', data = s)
_ = ax.set(title=f'Cat breed classes top {num}', ylabel='Cat breed', xlabel='Count')

pure_breeded = train['Breed1'].apply(lambda x: 0 if id_to_breed(x) in ['Mixed Breed', 'Domestic Short Hard', 'Domestic Medium Hair', 'Domestic Long Hair'] else 1)
print(f'Pure breeded pets: {sum(pure_breeded)}\nNot pure breeded pets: {len(pure_breeded)-sum(pure_breeded)}')

# %% [markdown]
# ### As we can see dogs as well as cats breeds are mostly dominated by ~3 classes. We've got 7512 purebreeded pets and 7481 pets that aren't purebreeded.

# %% [code] {"_kg_hide-input":true}
Esempio n. 13
0
#Histogram Plot of windspeed Column
plt.figure(figsize=(7, 7))
plt.hist(train['windspeed'], bins=10)
plt.xlabel('windspeed')
plt.ylabel('Frequency')

#Histogram Plot of count Column
plt.figure(figsize=(7, 7))
plt.hist(train['count'], bins=10)
plt.xlabel('count')
plt.ylabel('Frequency')

################################################## Bivariate Plots #################################################################################################

for i in cat_cnames:
    sns.catplot(x=i, y="count", data=train)
    fname = str(i) + '.pdf'

##################################################  Density Plots ##################################################################################################

sns.kdeplot(train['season'], shade=True)
sns.kdeplot(train['month'], shade=True)
sns.kdeplot(train['holiday'], shade=True)
sns.kdeplot(train['weekday'], shade=True)
sns.kdeplot(train['workingday'], shade=True)
sns.kdeplot(train['weather'], shade=True)
sns.kdeplot(train['temperature'], shade=True)
sns.kdeplot(train['atemp'], shade=True)
sns.kdeplot(train['humidity'], shade=True)
sns.kdeplot(train['windspeed'], shade=True)
sns.kdeplot(train['count'], shade=True)
Esempio n. 14
0
    ax[i, -1].set_xlabel('RT (s)')
    ax[i, 0].set_ylabel('Reward manipulation = {:.2f}'.format(rewardfactor))

exp1 = pd.read_csv('../data/exp1.csv')
exp2 = pd.read_csv('../data/exp2.csv')

exp1 = exp1[(exp1['dyn'] == 'Dynamic') & (exp1['sub'] != 666) &
            (exp1['setsize'] == 12)]
exp2 = exp2[(exp2['dyn'] == 'Dynamic') & (exp2['sub'] != 666) &
            (exp2['setsize'] == 12)]
exp1['reward'] = 'None'
newdf = exp1.append(exp1, sort=False)

g = sns.catplot(x='reward',
                y='rt',
                hue='target',
                data=newdf,
                kind='point',
                order=['Absent', 'None', 'Present'])
g.set_xticklabels(size=18)
g.set_yticklabels(size=18)
g.set_ylabels('RT (s)', size=20)
g.set_xlabels('Reward Condition', size=20)

ax = plt.gca()
ax.plot([0, 1, 2],
        sim_mrts[::-1, 1, 1, 1],
        label='Sim Present',
        c='lightgreen',
        lw=2)
ax.plot([0, 1, 2],
        sim_mrts[::-1, 1, 0, 0],
Esempio n. 15
0
plt.style.use('ggplot')
plt.xlabel("Victory Status")
plt.ylabel("Winner")
plt.title("Scatter Plot")
plt.show()

#relplot
sns.set(style="darkgrid")
sns.relplot(x="opening_ply", y="statusEnc", hue="winner", style="rated",ci=None,
            dashes=False, markers=True, kind="line", data=games);
plt.ylabel("Victory Status")
plt.show()
black = games.query("winner == 'black'")
white = games.query("winner == 'white'")
draw = games.query("winner == 'draw'")

#jointplot
sns.set(style="white")
sns.jointplot(games.white_rating, games.black_rating, kind="kde", height=7, space=0)

plt.show()

#cat plot
pl = sns.catplot(x='victory_status',y='turns',hue='winner',data=games,
                height=6, kind="bar", palette="muted")
pl.despine(left=True)
plt.show()

#Count plot
sns.countplot(x='winner',data=games)
plt.show()
Esempio n. 16
0
        hue='year',
        x='race',
    )
    if SHOW:
        show()
    else:
        savefig('./race_year_count.png', )
    del count_figure

    cat_figure = figure()
    cat = catplot(
        col='gender',
        data=input_df[[
            'gender',
            'race',
            'year',
        ]],
        hue='year',
        kind='count',
        x='race',
    )
    if SHOW:
        show()
    else:
        savefig('./race_year_catplot.png', )
    del cat_figure

    count_df = DataFrame([(key[0], key[1], key[2], value)
                          for key, value in dict(
                              Counter([
                                  tuple(item) for item in input_df[[
#Convert csv file to dataframe
df = pd.read_csv(filepath, encoding = 'ISO-8859-1')
#Seperate only heart rate observations
df_heartrate = df[df['type'].str.contains('HeartRate')]

#Convert creation date column to datetime format
df_heartrate['creation_date'] = pd.to_datetime(df_heartrate['creation_date'],
                                               format='%Y-%m-%d %H:%M:%S', utc=True)
df_heartrate['creation_date'] = df_heartrate['creation_date'].dt.tz_convert(None)

#Find integer value of weekday
df_heartrate['weekday'] = df_heartrate['creation_date'].dt.dayofweek
dayofweek = df_heartrate['weekday'].to_numpy()

#Convert integer value of weekday to string represenation
dayofweek_string = []
for i in range(0, len(dayofweek)):
    dayofweek_string.append(calendar.day_name[dayofweek[i]])
df_heartrate['weekday'] = dayofweek_string

#Set index of dataframe as creation date
df_heartrate.set_index('creation_date', inplace=True, drop=True)

#Create and show violin plot
sns.catplot(x='weekday', y='value',
            data=df_heartrate, kind='violin',
            order=['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'],
            inner='quartile')
plt.show()
Esempio n. 18
0
    estimator=sum,
    palette=sns.color_palette("muted",
                              n_colors=len(despesas_sem_vencimentos) + 4),
)
plot.set_xticklabels(plot.get_xticklabels(),
                     rotation=75,
                     horizontalalignment="right")
plt.show()

# In[48]:

sns.catplot(
    x="value",
    y="Descricao",
    col="month",
    data=despesas_com_classificacao,
    kind="bar",
    height=4,
    aspect=0.7,
)

# In[49]:

despesas_com_classificacao[[
    "legal_status", "subgroup", "Descricao", "summary", "Codigo"
]].head(100)

# In[50]:

# Auxílio-Alimentação
# Vale refeição/vale alimentação
def main(args):

    df, info = basic_compare('./data/toysff.p', './data/amazon.p', 'toysff',
                             'amazon')
    output_as_csv(df, './output/price_differences.csv')

    # prepare data for bar plot
    df = df.rename(columns={'toysff_name': 'name'})
    df_amazon = df.drop(['amazon_name', 'toysff_price'],
                        1).rename(columns={'amazon_price': 'price'})
    df_toysff = df.drop(['amazon_name', 'amazon_price'],
                        1).rename(columns={'toysff_price': 'price'})
    df_amazon['src'] = ['amazon'] * df_amazon['article_nr'].count()
    df_toysff['src'] = ['toys for fun'] * df_toysff['article_nr'].count()
    result = pd.concat([df_amazon, df_toysff])
    print(result)
    pickle.dump(result, open('./data/comparison_amazon_toysff', 'wb'))
    if args.csv:
        output_as_csv(result, './output/comparison_amazon_toysff.csv')

    # data for subbrand plot
    subbrand_data = result[['subbrand', 'src', 'price']]
    subbrand_aggregated = subbrand_data.groupby(
        ['subbrand', 'src'])['price'].agg(['sum', 'count', 'mean'])
    subbrand_aggregated = subbrand_aggregated.reset_index()

    # data for price segment plot
    segment_data = result[['price_segment', 'src', 'price']]
    segment_aggregated = segment_data.groupby(
        ['price_segment', 'src'])['price'].agg(['sum', 'count', 'mean'])
    segment_aggregated = segment_aggregated.reset_index()

    # for the correct count- and sum-aggregation we need to drop duplicates
    # all values below are the same for 'src=amazon' and 'src=toysff'
    result_single = result[[
        'article_nr', 'subbrand', 'price_segment', 'diff_abs', 'diff_%'
    ]].drop_duplicates()

    # Output on console and to csv if set true
    output_diff_by_category(result_single, 'subbrand', 'diff_abs', args.csv)
    output_diff_by_category(result_single, 'subbrand', 'diff_%', args.csv)
    output_diff_by_category(result_single, 'price_segment', 'diff_abs',
                            args.csv)
    output_diff_by_category(result_single, 'price_segment', 'diff_%', args.csv)

    if args.plot:
        # visualisation subbrands
        sns.set(style="whitegrid")
        sns.catplot(y='subbrand',
                    x='mean',
                    orient='h',
                    height=8,
                    hue='src',
                    kind='bar',
                    data=subbrand_aggregated,
                    aspect=.8)
        plt.title('Subbrands: Average Prices in €')
        plt.tight_layout(pad=1.08, h_pad=None, w_pad=None, rect=None)
        # visualation price_segments
        sns.set(style="whitegrid")
        sns.catplot(y='price_segment',
                    x='mean',
                    orient='h',
                    height=8,
                    hue='src',
                    kind='bar',
                    data=segment_aggregated,
                    aspect=.8)
        plt.title('Price Segments: Average Prices €')
        plt.tight_layout(pad=1.08, h_pad=None, w_pad=None, rect=None)

        plt.show()

    print('Main Statistics: ')
    print(info)
#plt.leyend('asndlsad')

plt.plot()

plt.barh(resumen(cierre, '19').reset_index()['X ENS'])
#############
sns.set(style="whitegrid")

# Load the example Titanic dataset
titanic = sns.load_dataset(resumen(cierre, '19'))

# Draw a nested barplot to show survival for class and sex
g = sns.catplot(x="PO",
                y=["X ENS", 'X PINTAR'],
                hue="CUSTOMER",
                data=resumen(cierre, '19').reset_index(),
                height=6,
                kind="bar",
                palette="muted")
g.despine(left=True)
g.set_ylabels("survival probability")

titanic = sns.load_dataset("titanic")

sns.catplot(y="CUSTOMER",
            hue="CUSTOMER",
            kind="count",
            palette="pastel",
            edgecolor=".6",
            data=carga)
Esempio n. 21
0
def gender_bar_graph():
    sns.catplot(x="sex", kind="count", palette="magma", data=data, height=6)
    plt.title("Gender of students : F - female,M - male")
    plt.show()
Esempio n. 22
0
    # stats
    pValsDF, bhDF = statsFeat(FeatMatGrouped, 'normal', 0.1, control)
    bhDF = bhDF.reset_index()
    bhDF['drug'] = [list(i) for i in bhDF.metadata.str if i.dtype == object][0]
    bhDF['date'] = bhDF.metadata.str[0]
    bhDF['worm_number'] = bhDF.metadata.str[1]
    bhDF['window'] = bhDF.metadata.str[-1]
    bhDF = bhDF.drop(columns='metadata')

    #plot the total number of sig feats by worm number and drug (with window as hue) for each date
    for date in metadata_dict['date']:
        sns.catplot(x = 'worm_number', \
                    y = 'sumSig',\
                    data = bhDF[bhDF['date']==date], \
                    hue = 'window', \
                    col = 'drug',\
                    kind ='bar', \
                    palette = 'colorblind'
                    )
        plt.savefig(os.path.join(save_dir, 'T_test_number_sig_feats5mins_{}.tif'.format(date)), bbox_inches='tight', \
        pad_inches=0.03)
    #and for all data combined
    sns.catplot(x = 'worm_number', \
                y = 'sumSig',\
                data = bhDF, \
                hue = 'window', \
                col = 'drug',\
                kind ='bar', \
                palette = 'colorblind',
                ci= 'sd'
                )
Esempio n. 23
0
df2.groupby(['biological_sample_group', 'sex'])[['data_point']].mean()

#how to pass this along to the plot functions?

#seems like a whisker plot can be generated directly from the group object

#scatter plot of values in the 4 groups - put those into 4 variables????
# need color or shape to show M vs F and KO vs WT

import matplotlib.pyplot as plt
x = df2['date_of_experiment']
y = df2['data_point']
plt.scatter(x,y)
plt.show()
#how to do a scatterplot and color and shape the 4 groups ??


#here is column plot of KO vs WT grouped by sex
import seaborn as sns
x = df2['date_of_experiment']
y = df2['data_point']
sns.catplot(x = 'sex', y = 'data_point', col = 'biological_sample_group', kind = 'bar', data=df2)
plt.show()


# does not have mut vs wt
# any reason to show weight?
#sns.violinplot(x = 'sex', y = 'weight', col = 'biological_sample_group', kind = 'violin', data=df2)
#plt.show()
    df_sub['p_corrected'] = ps_corrected[1]
    temp.append(df_sub)
anova_results = pd.concat(temp)
anova_results['stars'] = anova_results['p_corrected'].apply(utils.stars)
anova_results = anova_results.sort_values(['roi', 'condition', 'model'])
anova_results.to_csv('../../../../results/{}/RP/{}/one way ANOVA.csv'.format(
    experiment, 'encoding 15 stats'),
                     index=False)

g = sns.catplot(
    x='roi_name',
    y='mean_variance',
    hue='model_name',
    hue_order=[
        'VGG19', 'DenseNet1211', 'MobileNetV2', 'Fast Text', 'GloVe',
        'Word2Vec'
    ],
    row='condition',
    data=df,
    kind='bar',
    aspect=6,
    sharey=False,
)
g._legend.set_title('Encoding Models')
(g.set_axis_labels(
    "ROIs",
    "Mean Variance Explained").set_titles("{row_name}").set(ylim=(0, 0.06)))
g.axes[0][0].set(title='Shallow Process')
g.axes[1][0].set(title='Deep Process')
k = {'I2V': -0.25, 'W2V': 0.175}
j = 0.15
l = 0.0005
Esempio n. 25
0
import seaborn as sns
import matplotlib.pyplot as plt

df = sns.load_dataset("tips")

sns.set()
sns.catplot(x="sex",
            y="total_bill",
            data=df,
            kind="bar",
            col="day",
            col_wrap=2)
plt.show()
Esempio n. 26
0
plt.figure(figsize=(5, 5))
plt.tight_layout()
sns.boxplot(x='Type', y='Rating', data=df)

# <b><i style="font-size:14pt;"><u>Installs</u> par <u>Price</u> :</i></b>

# In[277]:

# Paid Vs free et le nombre d'Installs
installs_greater_1000 = df[df["Installs"] > 1000]
installs_greater_1000 = installs_greater_1000.sort_values(['Price'])

# In[278]:

plt.figure(figsize=(20, 20))
sns.catplot(x="Installs", y="Price", data=installs_greater_1000)
plt.xticks(rotation=90)
plt.show()

# <b><i style="font-size:14pt;"><u>Category</u> par <u>Size</u> :</i></b>

# In[279]:

plt.figure(figsize=(5, 15))
sns.barplot(x='Size', y='Category', data=df)

# > <b><i style="font-size:14pt;">Analyses de la variable <u>Installs</u> : dans cette partie on va se consacrer de la variable Installs avec les autres variables</i></b>
#

# <b><i style="font-size:14pt;">Groupement des nombres d'Installs en 4 groupes : A, B, C, Highest.</i></b>
    'awareness'] + ', ' + df_plot['confidence']

#temp = []
#for (target,subject),df_sub in df_plot.groupby(['success','sub']):
#    df_sub['prob'] = df_sub['count'] / df_sub['count'].sum()
#    temp.append(df_sub)
#df_plot = pd.concat(temp)
df_plot.to_csv(os.path.join(saving_dir, 'pos_for_plot.csv'))
df_plot = df_plot.sort_values(['awareness'])

g = sns.catplot(
    x='awareness',
    y='prob',
    hue='confidence',
    col='correctness',
    row='success',
    row_order=['high pos', 'low pos'],
    data=df_plot,
    kind='bar',
    aspect=2,
)
(g.set_axis_labels('Awareness',
                   'Probability').set_titles("{row_name} | {col_name}").set(
                       ylim=(0., 0.85)).despine(left=True))
for ii, (target, df_sub) in enumerate(df_plot.groupby('success')):
    #    formula = 'prob ~ C(correctness)*C(awareness)*C(confidence)'
    #    model = ols(formula, df_sub).fit()
    #    aov_table = anova_lm(model, typ=2)
    #    s = f"{target}, F({model.df_model: .0f},{model.df_resid: .0f}) = {model.fvalue: .3f}, p = {model.f_pvalue: .4f}"
    #    print(s)
    #    g.axes[ii][0].annotate(s,xy=(-0.45,.8))
    ci=None,
)
plt.xlabel("Season")
plt.ylabel("Hourly number of bikes rented")
plt.title("Number of bikes rented per hour by weather condition and season")
plt.xticks(ticks=(0, 1, 2, 3, 3.5))
plt.grid(which="major", axis="y")

# In[15]:

sns.catplot(
    x="mnth",
    y="cnt",
    kind="point",
    hue="workingday",
    data=hour.compute(),
    ci=None,
    palette="Set1",
    aspect=2.3,
    legend=False,
)
plt.legend(("Weekend", "Workday"),
           loc="upper right",
           bbox_to_anchor=(1.2, 0.5))
plt.xlabel("Month")
plt.ylabel("Hourly number of bikes rented")
plt.title("Number of bikes rented per hour by type of day")
plt.axhline(hour.cnt.mean().compute(), ls="--", color="#a5a5a5")
plt.text(0.5, hour.cnt.mean().compute() - 10, "Average", color="#a5a5a5")

# In[16]:
Esempio n. 29
0
train_data["Duration_hours"] = duration_hours
train_data["Duration_mins"] = duration_mins

train_data.drop(["Duration"], axis = 1, inplace = True)

train_data.head()

"""---"""

train_data["Airline"].value_counts()

# From graph we can see that Jet Airways Business have the highest Price.
# Apart from the first Airline almost all are having similar median

# Airline vs Price
sns.catplot(y = "Price", x = "Airline", data = train_data.sort_values("Price", ascending = False), kind="boxen", height = 6, aspect = 3)
plt.show()

# As Airline is Nominal Categorical data we will perform OneHotEncoding

Airline = train_data[["Airline"]]

Airline = pd.get_dummies(Airline, drop_first= True)

Airline.head()

train_data["Source"].value_counts()

# Source vs Price

sns.catplot(y = "Price", x = "Source", data = train_data.sort_values("Price", ascending = False), kind="boxen", height = 4, aspect = 3)
Esempio n. 30
0
"""
Plotting a three-way ANOVA
==========================

_thumb: .42, .5
"""
import seaborn as sns
sns.set(style="whitegrid")

# Load the example exercise dataset
df = sns.load_dataset("exercise")

# Draw a pointplot to show pulse as a function of three categorical factors
g = sns.catplot(x="time", y="pulse", hue="kind", col="diet",
                capsize=.2, palette="YlGnBu_d", height=6, aspect=.75,
                kind="point", data=df)
g.despine(left=True)
Esempio n. 31
0
# 지역별로 subplot 그리기
# col = 어떤 기준으로 subplot 그릴건지?
sns.relplot(data=df_last,
            x="연도",
            y="평당분양가격",
            hue="지역명",
            kind="line",
            col="지역명",
            col_wrap=4,
            ci=None)

# 연도별 평당 가격을 지역별로 subplot bar chart로 표현
sns.catplot(data=df_last,
            x="연도",
            y="평당분양가격",
            kind="bar",
            col="지역명",
            col_wrap=4)

# box plot
sns.boxplot(data=df_last, x="연도", y="평당분양가격")

# hue 사용해서 전용면적별로
plt.figure(figsize=(12, 3))
sns.boxplot(data=df_last, x="연도", y="평당분양가격", hue="전용면적")

# violin plot (box plot에 밀도추정 값을 같이 볼수 있다)
sns.violinplot(data=df_last, x="연도", y="평당분양가격")

# ### lmplot & swarmplot
# 연도별 평당분양가격을 lmplot으로
# One can find many ways to handle categorical data. Some of them categorical data are,
# 1. <span style="color: blue;">**Nominal data**</span> --> data are not in any order --> <span style="color: green;">**OneHotEncoder**</span> is used in this case
# 2. <span style="color: blue;">**Ordinal data**</span> --> data are in order --> <span style="color: green;">**LabelEncoder**</span> is used in this case

# In[24]:


df_train["Airline"].value_counts()


# In[25]:


# Airline vs Price

sns.catplot(x = "Airline", y = "Price", data = df_train.sort_values("Price", ascending = False), kind = "boxen", height = 6, aspect = 2)
plt.show()


# In[26]:


# From the above graph it is clear that jet airways has the maximun price
# And also apart from the jet airways almost all the other airlines has the same median


# In[27]:


# As Airline is a Nominal Category we will perform one hot encoding
Esempio n. 33
0
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

#Loading the data
data = pd.read_csv(path)
data['Rating'].plot(kind='hist')
plt.show()
#Code starts here
data = data[data['Rating'] <= 5]
data['Rating'].plot(kind='hist')
plt.show()

nulls = data.isnull().sum()
data.dropna(inplace=True)

plt.figure(figsize=(10, 10))
cat = sns.catplot(x='Category', y='Rating', kind='box', data=data, height=8)
cat.set_xticklabels(rotation=90)
plt.title("Rating vs Category boxplot", size=20)

data['Genres'] = data['Genres'].str.split(";", expand=True)[0]

mean_rating = data.groupby('Genres')['Rating'].mean()
max_rating = data.groupby('Genres')['Rating'].max()
min_rating = data.groupby('Genres')['Rating'].min()
rating_data = {
    'mean_rating': mean_rating,
    'max_rating': max_rating,
    'min_rating': min_rating
}
rating_data_df = pd.DataFrame(
    rating_data, columns=['mean_rating', 'max_rating', 'min_rating'])
Esempio n. 34
0
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 13 14:06:15 2019

@author: Eldrich
"""

import os
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

os.chdir('E:\\University\\data\\Class Work\\Thesis - KTH\\Extra Work\\Papers\\Journal Paper\\V2')
df=pd.read_csv('Delay_Table.csv')

sns.set(style='whitegrid')
#sns.set(rc={'figure.figsize':(11.7,8.27)})
#g=sns.barplot(x=df['Run number'], y=df.Delay, hue=df.Pulse, data=df, color='gray')
#g.despine(left=True)
g=sns.catplot('Run number', 'Delay', 'Pulse', data=df, kind='bar', color='gray', legend=False, aspect=2, size=3)
g.set_ylabels('Delay (s)')
g.set_xlabels(label='')
g.set(ylim=(0,0.5),xlim=g.ax.axes.get_xbound())
sns.despine()
plt.plot([-0.54, 2.5400000000000005], [0.3435263157894737,0.3435263157894737], linewidth=1, linestyle='--', color='k')
#sns.set_context("paper")
plt.annotate('Average',xy=(1,0), xytext=(2,0.35))

plt.tight_layout()
Esempio n. 35
0
# Graphing the differences of infrastructure reports:
# merge the two datasets and assign column to distinguish them

infra_vergleich = pd.concat([
    vor_profilsumme_infra.assign(Monat='Vormonat'),
    neu_profilsumme_infra.assign(Monat='aktueller Monat')
])

infra_vergleich.reset_index(inplace=True)

fig, ax = plt.subplots(figsize=(15, 15))
ax = sns.catplot(x="Profil",
                 y="Preis 1 (€)",
                 hue="Monat",
                 data=infra_vergleich,
                 height=8,
                 kind="bar",
                 palette="muted",
                 legend=False)

ax.set_title = 'Infrastructure Vergleich zu Vormonat'
ax.set_ylabels("Umsatz in EURO")
ax.set_xlabels("Profile")

plt.legend(loc="upper left")
fig.tight_layout()

plt.savefig("diff_infra.png")
plt.clf()

# Graphing the differences of extern reports: