コード例 #1
0
ファイル: graphhelper.py プロジェクト: Loudness/AILAB
def createGraphTEST(folder, inputdata=0):
    sns.set(style="whitegrid")
    networks = sns.load_dataset("brain_networks",
                                index_col=0,
                                header=[0, 1, 2])
    networks = networks.T.groupby(level="network").mean().T
    order = networks.std().sort_values().index
    sns.lvplot(data=networks, order=order, scale="linear",
               palette="mako").figure.savefig(folder + os.sep + "result.png")
コード例 #2
0
 def plot(self, score=None, data=None, bw=0.15, jitter=0.05):
     if score is None:
         return
     if data is None:
         data = self.results()
     sb.lvplot(y='model', x=score, data=data)
     sb.violinplot(y='model', x=score, data=data, inner='quartile', bw=bw)
     sb.swarmplot(y='model', x=score, data=data, color='w', alpha=0.5)
     sb.stripplot(y='model',
                  x=score,
                  data=data,
                  color='w',
                  alpha=0.5,
                  jitter=jitter)
     sb.pointplot(y='model', x=score, data=data, color='k')
     plt.title(score)
コード例 #3
0
def boxplot_violinplot_lvplot(df):
    # Create a boxplot
    sns.boxplot(data=df, x='Award_Amount', y='Model Selected')
    plt.show()
    plt.clf()

    # Create a violinplot with the husl palette
    sns.violinplot(data=df, x='Award_Amount',
                   y='Model Selected', palette='husl')
    plt.show()
    plt.clf()

    # Create a lvplot with the Paired palette and the Region column as the hue
    sns.lvplot(data=df, x='Award_Amount', y='Model Selected',
               palette='Paired', hue='Region')
    plt.show()
    plt.clf()
コード例 #4
0
def plot_var_distribution_over_time(df, vintage_var, vars, missing_value=-999.0):
    '''
    INPUT:
        df; dataframe containing observations over a time period
        vintage_var; year-month 'vintage', should look like YYYY-MM,
                     must be present in df
        vars; variables to plot distribution of over time
        missing_value; value imputed to fill existing nulls, replaced with nulls
        title; plot title
    OUTPUT:
        plots of variable distributions over time represented in df
    AUTHOR: jonoleson
    '''
    for var in vars:
        temp = df[[vintage_var, var]].copy()
        temp.loc[:,var] = temp[var].replace(missing_value, np.nan) #re-introduce nulls
        temp.sort_values(vintage_var, inplace=True)

        f, ax = plt.subplots(figsize=(10,8))
        sns.lvplot(x=vintage_var, y=var, data=temp, ax=ax)
        plt.xticks(rotation=70)
        ax.set_title(var+' Distribution by Vintage', fontdict=font)
        plt.show()
コード例 #5
0
               # bw = 0.8      #控制拟合程度,一般可以不设置
               )
plt.show()
# 通过hue再分类
sns.violinplot(x='day',y='total_bill',data=tips,
               hue='smoker',palette='muted',
               split=True,  # 设置是否拆分小提琴图
               inner='quartile')
plt.show()
# 结合散点图
sns.violinplot(x='day',y='total_bill',data=tips,palette='muted',inner=None)
sns.swarmplot(x='day',y='total_bill',data=tips,color='w',alpha=0.5)
plt.show()
'''
# LV图表 - lvplot()
'''
tips = pd.read_csv('C:\\Users\刘邦国\PycharmProjects\study\data_visual\\tips.csv')
# 绘制LV图
sns.lvplot(x='day',y='total_bill',data=tips,palette='mako',
           # hue='smoker',
           width=0.8,  # 箱之间的间隔比例
           linewidth=12,
           scale='area',  # 设置框的大小 --> 'linear'、'exonential'、'area'
           k_depth='proportion'  # 设置框的数量 --> 'proportion','tukey','trustworthy'
           )
# 可以添加散点图
sns.swarmplot(x='day',y='total_bill',data=tips,color='k',size=3,alpha=0.8)
plt.show()
'''

# 统计图----
コード例 #6
0
df_yout['month'] = df_yout['publish_time'].dt.month

print("Category Name count")
print(df_yout['month'].value_counts()[:5])

plt.figure(figsize=(14, 9))

plt.subplot(211)
g = sns.countplot('month', data=df_yout, palette="Set1")
g.set_xticklabels(g.get_xticklabels(), rotation=45)
g.set_title("Counting Months ", fontsize=20)
g.set_xlabel("Months", fontsize=15)
g.set_ylabel("Count", fontsize=15)

plt.subplot(212)
g1 = sns.lvplot(x='month', y='like_rate', data=df_yout, palette="Set1")
g1.set_xticklabels(g.get_xticklabels(), rotation=45)
g1.set_title("Like Rate by Month", fontsize=20)
g1.set_xlabel("Months", fontsize=15)
g1.set_ylabel("Like Rate(log)", fontsize=15)

plt.subplots_adjust(hspace=0.5, top=0.9)

plt.show()

# ## Let's extract the hour value of datetime'

# In[ ]:

# separates date and time into two columns from 'publish_time' column
df_yout.insert(4, 'publish_date', df_yout['publish_time'].dt.date)
コード例 #7
0
sns.heatmap(corr, annot=True)
plt.title('Correlation between variables')

# Pair-plot to show relationships between variables
plt.figure(figsize=(10, 10))
sns.pairplot(dataset, hue='target')
plt.show()

# Boxplot
columns = [x for x in dataset.columns if x not in 'target']
length = len(columns)
plt.figure(figsize=(15, 15))

for i, j in itertools.zip_longest(columns, range(length)):
    plt.subplot(4, 2, j + 1)
    sns.lvplot(x=dataset['target'], y=dataset[i])
    plt.title(i)
    plt.axhline(dataset[i].mean(), linestyle='dashed')  #

# Violinplot
columns = [x for x in dataset.columns if x not in 'target']
length = len(columns)

for i, j in itertools.zip_longest(columns, range(length)):
    plt.subplot(4, 2, j + 1)
    sns.violinplot(x=dataset['target'], y=dataset[i])
    plt.title(i)

# Train-Test Split
X = dataset.drop(columns='target')
Y = dataset['target'].values
コード例 #8
0
    health_frame = pd.read_csv("../../build/health/{}-health.csv".format(name))
    metric = health_metric(health_frame)
    avg_props.ix[i, 'health'] = metric.mean()

group1 = avg_props[avg_props['cluster'] == 0]['health']
group2 = avg_props[avg_props['cluster'] == 1]['health']

print("Group 1: {} (+/- {}".format(group1.mean(), group1.std()))
print("Group 2: {} (+/- {}".format(group2.mean(), group2.std()))

statistic, pval = ttest_ind(group1, group2, equal_var=False)
print("t-statistic: {}, pvalue: {}".format(statistic, pval))

plt.figure(figsize=(3.5,3))
colors = seaborn.color_palette('magma', n_colors=5)[3:]
plot = seaborn.lvplot(data=[group1, group2], palette=colors)
seaborn.despine()
plot.minorticks_on()
plot.tick_params(axis='x', which='minor', bottom='off')
plot.set_xlabel('')
h_display = health_display[sys.argv[1]]
plot.set_ylabel(h_display[0])
if h_display[1] is not None:
    plot.set_ylim(h_display[1])
labels = ["Cluster {}".format(i) for i in range(1, 3)]
plot.set_xticklabels(labels)
plt.tight_layout()
plt.savefig("{}.pdf".format(sys.argv[1]))
plt.close()

for i, centroid in enumerate(clusterer.cluster_centers_):
コード例 #9
0
  .toPandas()

# A bar plot:
sns.barplot(x="rider_student", y = "distance", data=sample_pdf)

# A point plot:
sns.pointplot(x="rider_student", y = "distance", data=sample_pdf)

# A strip plot:
sns.stripplot(x="rider_student", y="distance", data=sample_pdf, jitter=True)

# A swarm plot:
sns.swarmplot(x="rider_student", y="distance", data=sample_pdf)

# A letter value plot:
sns.lvplot(x="rider_student", y="distance", data=sample_pdf)

# A box plot:
sns.boxplot(x="rider_student", y="distance", data=sample_pdf)

# A violin plot:
sns.violinplot(x="rider_student", y="distance", data=sample_pdf)


# ## Exploring more than two variables

# There are numerous ways to explore more than two variables.  The appropriate
# table or plot depends on the variable types and particular question you are
# trying to answer.  We highlight a few common approaches below.

# ### N-way summary tables
コード例 #10
0
                  kde=False)
ax = ax.set_title('ILP')
ax = sns.distplot(sdss_df[sdss_df['Category_list'] == 'Annuity'].totalpremium,
                  bins=30,
                  ax=axes[3],
                  kde=False)
ax = ax.set_title('Annuity')
ax = sns.distplot(sdss_df[sdss_df['Category_list'] == 'Others'].totalpremium,
                  bins=30,
                  ax=axes[4],
                  kde=False)
ax = ax.set_title('Others')

fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(16, 4))
ax = sns.lvplot(x=sdss_df['Category_list'],
                y=sdss_df['Category_count'],
                palette='coolwarm')
ax.set_title('Category_count')

fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(16, 4))
ax = sns.lvplot(x=sdss_df['Category_list'],
                y=sdss_df['PH_AgeGroup'],
                palette='coolwarm')
ax.set_title('PH_AgeGroup')

fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(16, 4))
ax = sns.lvplot(x=sdss_df['Category_list'],
                y=sdss_df['addr_type_desc'],
                palette='coolwarm')
ax.set_title('addr_type_desc')
コード例 #11
0
ファイル: large_distributions.py プロジェクト: adelq/seaborn
"""
Plotting large distributions
============================

"""
import seaborn as sns
sns.set(style="whitegrid")

networks = sns.load_dataset("brain_networks", index_col=0, header=[0, 1, 2])
networks = networks.T.groupby(level="network").mean().T
order = networks.std().sort_values().index

sns.lvplot(data=networks, order=order, scale="linear", palette="mako")
コード例 #12
0
        total_user.append(int(row[3]))

print 'Maximum Age:', max(active_age), 'Minimum Age:', min(active_age)
print 'Maximum User:'******'Minimum User:'******'Maximum Post:', max(total_post), 'Minimum Post:', min(total_post)

cmap = matplotlib.cm.get_cmap('Greys')

rgb_1 = cmap(0.3)
rgb_2 = cmap(0.45)
rgb_3 = cmap(0.6)

#Plotting by seaborn
f, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3)

sns.lvplot(y=active_age, ax=ax1, color=rgb_1)
sns.lvplot(y=total_user, ax=ax2, color=rgb_2)
sns.lvplot(y=total_post, ax=ax3, color=rgb_3)

ax1.set(xlabel='# of Months')
ax2.set(xlabel='# of Users')
ax3.set(xlabel='# of Posts')

ax1.set_yscale('log')
ax1.minorticks_off()
ax1.yaxis.set_ticks(np.logspace(1, 2, 2))

ax2.set_yscale('log')
ax2.minorticks_off()
ax2.yaxis.set_ticks(np.logspace(3, 6, 4))
for i, age in enumerate(age_list):
    bin_id = (age-min(age_list))*5/(max(age_list)+1-min(age_list))+1
    age_wise_contribution_bins[bin_id].append(contribution_list[i])

#Plotting by seaborn
cmap = matplotlib.cm.get_cmap('Greys')

rgb_1 = cmap(0.3)
rgb_2 = cmap(0.37)
rgb_3 = cmap(0.44)
rgb_4 = cmap(0.51)
rgb_5 = cmap(0.58)

f, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(nrows = 1, ncols = 5, sharey=True)

sns.lvplot(y = age_wise_contribution_bins[1], ax = ax1, color =  rgb_1)
sns.lvplot(y = age_wise_contribution_bins[2], ax = ax2, color =  rgb_2)
sns.lvplot(y = age_wise_contribution_bins[3], ax = ax3, color =  rgb_3)
sns.lvplot(y = age_wise_contribution_bins[4], ax = ax4, color =  rgb_4)
sns.lvplot(y = age_wise_contribution_bins[5], ax = ax5, color =  rgb_5)

ax3.set_title('ANDROID')

ax1.set(xlabel='1 - 13')
ax2.set(xlabel='14 - 26')
ax3.set(xlabel='27 - 39\n # of Months')
ax4.set(xlabel='40 - 52')
ax5.set(xlabel='53 - 65')

ax1.set(ylabel='# of Answers')
コード例 #14
0
    data=tips,
    hue='smoker',
    palette='muted',
    split=True,  #设置是否拆分小提琴图
    inner='quartile')

#2.violinplot()
#结合散点图

sns.violinplot(x='day', y='total_bill', data=tips, palette='hls', inner=None)
sns.swarmplot(x='day', y='total_bill', data=tips, color='w', alpha=.5)

#3.lvplot()
#LV图表

sns.lvplot(
    x='day',
    y='total_bill',
    data=tips,
    palette='mako',
    #hue = 'smoker',
    width=0.8,  #箱之间间隔比例
    linewidth=12,
    scale='area',  #设置框的大小 -> 'linear','exonential','area'
    k_depth='proportion',  #设置框的数量 -> 'proportion','tukey','trustworthy'
)
#绘制LV图

sns.swarmplot(x='day', y='total_bill', data=tips, color='k', alpha=0.8)
#可以添加散点图
コード例 #15
0







# Vertical Bars
sns.set(style="whitegrid")

networks = sns.load_dataset("brain_networks", index_col=0, header=[0, 1, 2])
networks = networks.T.groupby(level="network").mean().T
order = networks.std().sort_values().index

sns.lvplot(data=networks, order=order, scale="linear", palette="mako")







# Cool Hexbin
import numpy as np
from scipy.stats import kendalltau
import seaborn as sns
sns.set(style="ticks")

rs = np.random.RandomState(11)
x = rs.gamma(2, size=1000)
コード例 #16
0
def plot_neuron_activity(activations,
                         num_neuron_samples: int,
                         mode="lv",
                         color=None,
                         neuron_samples=None,
                         save=None):
    """
    Plot the activation values for a certain number of randomly sampled neurons over all samples.
    """
    assert mode in (
        "lv", "boxes"
    ), "Mode has to be either 'lv' or 'boxes, '{}' found.".format(mode)

    num_neurons = activations[0].shape[1]

    sampled_neurons = neuron_samples
    if neuron_samples is None:
        sampled_neurons = sample(range(num_neurons), k=num_neuron_samples)

    activations_per_neuron = np.empty((num_neuron_samples, 0))

    # Aggregate activations for sample neurons
    for data_point in activations:
        for time_step in data_point:
            current_neuron_activations = time_step[sampled_neurons].reshape(
                -1, 1)
            activations_per_neuron = np.concatenate(
                (activations_per_neuron, current_neuron_activations), axis=1)

    # Create LV plot
    if mode == "lv":
        # Create pandas data frame object
        rows, columns = activations_per_neuron.shape
        data = pd.DataFrame(
            data={
                "activation":
                activations_per_neuron.reshape(rows * columns, ),
                "neuron":
                np.concatenate([
                    np.array([neuron_index] * columns)
                    for neuron_index in range(1, rows + 1)
                ],
                               axis=0)
            })

        # Visualize using Letter value plot
        ax = sns.lvplot(x="neuron",
                        y="activation",
                        scale="linear",
                        data=data,
                        width=1,
                        palette=sns.color_palette("viridis",
                                                  n_colors=num_neuron_samples))

    # Create box and whiskers plot
    elif mode == "boxes":
        fig, axis = plt.subplots(1, 1)
        ax = axis

        # Sort neurons by variance and mean for better readability
        def _box_size(data):
            q1, med, q3 = np.percentile(data, [25, 50, 75])
            return np.abs(q1 - med) + np.abs(q3 - med)

        activations_per_neuron = np.array_split(activations_per_neuron,
                                                num_neuron_samples,
                                                axis=0)
        activations_per_neuron = sorted(activations_per_neuron,
                                        key=lambda x: _box_size(x))

        bplot = ax.boxplot([
            activations_per_neuron[i].squeeze()
            for i in range(num_neuron_samples)
        ],
                           vert=True,
                           sym="",
                           patch_artist=True,
                           whis=10000,
                           notch=True,
                           manage_xticks=False)
        # Show min and max by setting whis very high

        # Coloring
        for patch in bplot["boxes"]:
            color = "tab:blue" if color is None else color
            patch.set_facecolor(color)

    ax.set_xticks([
        i
        for i in range(1, num_neuron_samples + 1, int(num_neuron_samples / 10))
    ])

    if save is None:
        plt.show()
    else:
        plt.savefig(save, bbox_inches="tight")
        plt.close()
コード例 #17
0
ファイル: bnnGui.py プロジェクト: ffurkanhas/nanebDataMining
 def plotDrawOk(self):
     self.selection = self.combobox.get()
     self.selection2 = self.combobox2.get()
     self.selection3 = self.combobox3.get()
     self.selectionLeft = self.comboboxLeft.get()
     window = Toplevel(self.plot_frame)
     if self.selection3 == 'plot':
         x = df[self.selection]
         y = df[self.selection2]
         self.fig, self.ax = plt.subplots(figsize=(8, 8))
         self.canvas = FigureCanvasTkAgg(self.fig, window)
         self.canvas.get_tk_widget().pack(side='bottom',
                                          fill='both',
                                          expand=1)
         plt.plot(x[df.left == int(self.selectionLeft)],
                  y[df.left == int(self.selectionLeft)],
                  'o',
                  alpha=0.1)
         plt.ylabel(self.selection2)
         plt.title('Employees who left')
         plt.xlabel(self.selection)
         self.canvas.draw()
     if self.selection3 == 'countplot':
         self.fig, self.ax = plt.subplots(figsize=(15, 5))
         self.canvas = FigureCanvasTkAgg(self.fig, window)
         self.canvas.get_tk_widget().pack(side='bottom',
                                          fill='both',
                                          expand=1)
         if self.checkCmd.get() == 1:
             sns.countplot(y=self.selection, hue='left',
                           data=df).set_title('Employee ' + self.selection +
                                              ' Turnover Distribution')
         if self.checkCmd.get() == 0:
             sns.countplot(y=self.selection,
                           data=df).set_title('Employee ' + self.selection +
                                              ' Turnover Distribution')
     if self.selection3 == 'barplot':
         self.fig, self.ax = plt.subplots(figsize=(15, 5))
         self.canvas = FigureCanvasTkAgg(self.fig, window)
         self.canvas.get_tk_widget().pack(side='bottom',
                                          fill='both',
                                          expand=1)
         if self.checkCmd.get() == 1:
             sns.barplot(df[self.selection],
                         df[self.selection2],
                         hue=df.left)
         if self.checkCmd.get() == 0:
             sns.barplot(df[self.selection], df[self.selection2])
     if self.selection3 == 'barplotPercentage':
         self.fig, self.ax = plt.subplots(figsize=(15, 5))
         self.canvas = FigureCanvasTkAgg(self.fig, window)
         self.canvas.get_tk_widget().pack(side='bottom',
                                          fill='both',
                                          expand=1)
         if self.checkCmd.get() == 1:
             ax = sns.barplot(x=self.selection,
                              y=self.selection,
                              hue="left",
                              data=df,
                              estimator=lambda x: len(x) / len(df) * 100)
         if self.checkCmd.get() == 0:
             ax = sns.barplot(x=self.selection,
                              y=self.selection,
                              data=df,
                              estimator=lambda x: len(x) / len(df) * 100)
         ax.set(ylabel="Percent")
     if self.selection3 == 'kdeplot':
         self.fig, self.ax = plt.subplots(figsize=(15, 5))
         self.canvas = FigureCanvasTkAgg(self.fig, window)
         self.canvas.get_tk_widget().pack(side='bottom',
                                          fill='both',
                                          expand=1)
         ax = sns.kdeplot(df.loc[(df['left'] == 0), self.selection],
                          color='b',
                          shade=True,
                          label='no turnover')
         ax = sns.kdeplot(df.loc[(df['left'] == 1), self.selection],
                          color='r',
                          shade=True,
                          label='turnover')
         plt.title('Employee ' + self.selection +
                   ' Distribution - Turnover V.S. No Turnover')
     if self.selection3 == 'distplot':
         self.fig, self.ax = plt.subplots(figsize=(7, 7))
         self.canvas = FigureCanvasTkAgg(self.fig, window)
         self.canvas.get_tk_widget().pack(side='bottom',
                                          fill='both',
                                          expand=1)
         plt.xlabel(self.selection, fontsize=12)
         plt.ylabel('distribution', fontsize=12)
         sns.distplot(df[self.selection], kde=True)
     if self.selection3 == 'stripplot':
         self.fig, self.ax = plt.subplots(figsize=(7, 7))
         self.canvas = FigureCanvasTkAgg(self.fig, window)
         self.canvas.get_tk_widget().pack(side='bottom',
                                          fill='both',
                                          expand=1)
         sns.stripplot(df[self.selection], df[self.selection2])
     if self.selection3 == 'pointplot':
         self.fig, self.ax = plt.subplots(figsize=(7, 7))
         self.canvas = FigureCanvasTkAgg(self.fig, window)
         self.canvas.get_tk_widget().pack(side='bottom',
                                          fill='both',
                                          expand=1)
         sns.pointplot(df[self.selection], df[self.selection2])
     if self.selection3 == 'lvplot':
         self.fig, self.ax = plt.subplots(figsize=(7, 7))
         self.canvas = FigureCanvasTkAgg(self.fig, window)
         self.canvas.get_tk_widget().pack(side='bottom',
                                          fill='both',
                                          expand=1)
         sns.lvplot(df[self.selection], df[self.selection2])
     if self.selection3 == 'factorplot':
         self.fig, self.ax = plt.subplots(figsize=(7, 7))
         self.canvas = FigureCanvasTkAgg(self.fig, window)
         self.canvas.get_tk_widget().pack(side='bottom',
                                          fill='both',
                                          expand=1)
         sns.factorplot(y=self.selection2,
                        x=self.selection,
                        data=df,
                        kind="box",
                        ax=self.ax)
         plt.tight_layout()
         plt.gcf().clear()
        ransac.fit(np.array(user_count).reshape(-1, 1), np.array(answerer_count).reshape(-1, 1))
        inlier_mask = ransac.inlier_mask_
        r_answerer.append(ransac.score(np.array(user_count).reshape(-1, 1)[inlier_mask], np.array(answerer_count).reshape(-1, 1)[inlier_mask]))

        ransac = linear_model.RANSACRegressor()
        ransac.fit(np.array(user_count).reshape(-1, 1), np.array(commenter_count).reshape(-1, 1))
        inlier_mask = ransac.inlier_mask_
        r_commenter.append(ransac.score(np.array(user_count).reshape(-1, 1)[inlier_mask], np.array(commenter_count).reshape(-1, 1)[inlier_mask]))


        current_site = row[0]
        
        asker_count[:] = []
        answerer_count[:] = []
        commenter_count[:] = []
        user_count[:] = []
        
        asker_count.append(int(row[6]))
        answerer_count.append(int(row[7]))
        commenter_count.append(int(row[12]))
        user_count.append(int(row[14]))

df = pd.DataFrame({'Asker': r_asker, 'Answerer': r_answerer, 'Commenter': r_commenter})
ax = sns.lvplot(data = df, palette=sns.mpl_palette("gist_yarg"))
ax.set(ylabel='Coeff. of Determination, $R^2$')
ax.set_yticks(np.arange(0.0, 1.0, 0.05), minor=True)
sns.despine(offset = 10, trim=True, bottom = True)
sns.plt.tight_layout()
plt.savefig('User_to_Roles_R_Squared_LV.pdf')

コード例 #19
0
                    ax=ax1)
ax1.set_xticklabels(ax1.get_xticklabels(),rotation=90)
ax1.set_title("bedrooms counting", fontsize=15)
ax1.set_xlabel("Bathrooms number")
ax1.set_ylabel("count")

ax2 = plt.subplot(222)
ax2 = sns.regplot(x="bedrooms", y='price', 
                  data=df_usa, ax=ax2, x_jitter=True)
ax2.set_xticklabels(ax2.get_xticklabels(),rotation=90)
ax2.set_title("Bedrooms distribution price", fontsize=15)
ax2.set_xlabel("Bedrooms number")
ax2.set_ylabel("log Price(US)")

ax0 = plt.subplot(212)
ax0 = sns.lvplot(x="bedrooms", y="price",
                    data=df_usa)
ax0.set_title("Better understaning price", fontsize=15)
ax0.set_xlabel("Bedrooms")
ax0.set_ylabel("log Price(US)")
ax0.set_xticklabels(ax0.get_xticklabels(),rotation=90)


plt.show()


# In[ ]:


print("Floors counting description")
print(df_usa['floors'].value_counts())
コード例 #20
0
#1. stripplot, swarmplot : plots of each observation
#2. boxplot, violinplot, lvplot: abstract representation
#3. barplot, pointplot, countplot: statistical estimates

#stripplot
sns.striplot(data=df, y="DRG Definition", x="Average Covered Charges", jitter=True)
#swarmplot
sns.swarmplot(data=df, y="DRG Definition", x="Average Covered Charges")

#boxplot
sns.boxplot(data=df, y="DRG Definition", x="Average Covered Charges")
#violinplot
sns.violinplot(data=df, y="DRG Definition", x="Average Covered Charges")
#lvplot:letter value plot for large datasets
sns.lvplot(data=df, y="DRG Definition", x="Average Covered Charges")

#barplot
sns.barplot(data=df, y="DRG Definition", x="Average Covered Charges", hue="Region")
#pointplot
sns.pointplot(data=df, y="DRG Definition", x="Average Covered Charges", hue="Region")
#countplot
sns.countplot(data=df, y="DRG_Code", hue="Region")


#ex.1

#part 1
# Create the stripplot
sns.stripplot(data=df,
         x='Award_Amount',
コード例 #21
0
# Boxplot - used to show several measures related to dist of data incl median, upper and lower quartiles and outliers

sns.boxplot(data=df, y="DRG Definition",
            x="Average Covered Charges")

# Violinplot - combination of kernel density plot and boxplot, suitable for providing an alternative view of the dist of data

sns.violinplot(data=df, y="DRG Definition",
               x="Average Covered Charges")
# As uses a kernel density function, does not show all datapoints
# Useful for large datasets, can be computationally intensive to create

# lvplot - letter value plot

sns.lvplot(data=df, y="DRG Definition",
           x="Average Covered Charges")

# API same as boxplot and violin plot
# Hybrid between boxplot and violin plot
# Relatively quick to render and easy to interpret

# Barplot - shows estimate of value and confidence interval

sns.barplot(data=df, y="DRG Definition",
            x="Average Covered Charges",
            hue="Region")

# Pointplot - similar to barplot, shows summary measure and confidence interval
# Can be useful for observing how values change across categorical values
コード例 #22
0
import seaborn as sns

sns.lvplot(my_df["col_0"])
コード例 #23
0
# Create a boxplot
sns.boxplot(data=df, x='Award_Amount', y='Model Selected')

plt.show()
plt.clf()

# Create a violinplot with the husl palette
sns.violinplot(data=df, x='Award_Amount', y='Model Selected', palette='husl')

plt.show()
plt.clf()

# Create a lvplot with the Paired palette and the Region column as the hue
sns.lvplot(data=df,
           x='Award_Amount',
           y='Model Selected',
           palette='Paired',
           hue='Region')

plt.show()
plt.clf()

# Show a countplot with the number of models used with each region a different color
sns.countplot(data=df, y="Model Selected", hue="Region")

plt.show()
plt.clf()

# Create a pointplot and include the capsize in order to show bars on the confidence interval
sns.pointplot(data=df, y='Award_Amount', x='Model Selected', capsize=.1)
コード例 #24
0
StartSorted = SD_2016.groupby(by=['start_station_code'])['start_date'].agg(
    {'Count': np.size})
StartSorted['Count'] = StartSorted.Count.astype(int)
StartSorted = StartSorted.sort_values(by='Count', ascending=False)
StartSorted.head()

### 2.1.3. Let's plot useage duration distribution and Toal useage times

f, axes = plt.subplots(2, 1, figsize=(20, 10))
plt.sca(axes[0])
TopStartUsedStation = np.array(StartUsedSorted.head(20).index)
TopStartUsedStationData = SD_2016[SD_2016['start_station_code'].isin(
    TopStartUsedStation)]
sns.lvplot(data=TopStartUsedStationData,
           x='start_station_code',
           y='duration_sec',
           order=TopStartUsedStation,
           palette="ocean")
plt.title('The longest useage duration of start station in 2016', fontsize=18)

plt.sca(axes[1])
TopStartUsed = np.array(StartSorted.head(20).index)
TopStartUsedData = SD_2016[SD_2016['start_station_code'].isin(TopStartUsed)]
sns.countplot(data=TopStartUsedData,
              x='start_station_code',
              order=TopStartUsed,
              palette="ocean")
plt.title('The most useage times of start station in 2016', fontsize=18)
plt.show()

### 2.1.4. Let's see what is the most useless station based on total used duration
コード例 #25
0
import seaborn as sns
import matplotlib.pyplot as plt

# load data
tips = sns.load_dataset("tips")

# common style
sns.set_style("whitegrid")

##
## Example 1
## =========

ax = sns.lvplot(x=tips["total_bill"])

# save and show the picture
plt.savefig('lvplot_1.png')
plt.show()

##
## Example 2
## =========

ax = sns.lvplot(x="day", y="total_bill", data=tips)

# save and show the picture
plt.savefig('lvplot_2.png')
plt.show()

##
## Example 3
コード例 #26
0
ax = sns.pointplot(x="Category", y="Clustering", hue="Category", data=df)
plt.show(block=True)

sns.pairplot(data=df, hue="Category")
plt.show(block=True)

sns.violinplot(x="Category",
               y="Clustering",
               hue="Category",
               data=df,
               inner="quart")
plt.show()

sns.lvplot(x="Category",
           y="Clustering",
           hue="Category",
           data=df,
           linewidth=2.5)
plt.show(block=True)

sns.swarmplot(x="Category", y="Clustering", hue="Category", data=df)
plt.show(block=True)

sns.regplot(x="Density", y="Clustering", data=df)
plt.show(block=True)

# sns.lmplot(x="Density", y="Clustering", hue="Category", truncate=True, size=5, data=df)
# plt.show(block=True)

# sns.lmplot(x="Density", y="Clustering", hue="Category", size=5, data=df)
# plt.show(block=True)
コード例 #27
0
ファイル: script677.py プロジェクト: darkblue-b/kaggleScape
fig, ax = plt.subplots(3, 1, figsize=(14, 10))
sns.boxplot(x="grade",
            y="loan_amnt",
            data=df_loan,
            palette="hls",
            ax=ax[0],
            hue="application_type",
            order=["A", 'B', 'C', 'D', 'E', 'F', 'G'])
sns.violinplot(x='grade',
               y="int_rate",
               data=df_loan,
               hue="application_type",
               palette="hls",
               ax=ax[1],
               order=["A", 'B', 'C', 'D', 'E', 'F', 'G'])
sns.lvplot(x="sub_grade", y="loan_amnt", data=df_loan, palette="hls", ax=ax[2])

plt.show()

# Very very inteWe can clearly see difference patterns between Individual and Joint applications

# <h1>Let's look the Employment title Distribuition </h1>

# In[ ]:

#First plot
trace0 = go.Bar(
    x=df_loan.emp_title.value_counts()[:40].index.values,
    y=df_loan.emp_title.value_counts()[:40].values,
    marker=dict(color=df_loan.emp_title.value_counts()[:40].values),
)
コード例 #28
0
createProfilesBoxplot(profileData=x,
                      plotId="8.11.32.top",
                      namedProfiles=biasProfiles,
                      highlightProfiles=tuple(top),
                      title="Top 10 (100-300nt)")
createProfilesBoxplot(profileData=x,
                      plotId="8.11.32.bottom",
                      namedProfiles=biasProfiles,
                      highlightProfiles=tuple(bottom),
                      title="Bottom 10 (100-300nt)")

fig, ax1 = plt.subplots(figsize=(8, 2.2))
# Plot density using KDE
#sns.kdeplot( plotdata[:,:,0].flatten(), plotdata[:,:,1].flatten(), n_levels=10, bw=0.65, cmap="Blues", shade=True, shade_lowest=True, legend=True, ax=ax1 )

sns.lvplot(data=x, ax=ax1, color="#50d080")
#sns.boxplot(data=x, color="#50d080", ax=ax1, boxprops={'zorder': 10})

# Plot neutral line
plt.plot([-1, v], [0, 0], '-k')

# Plot window width
plt.plot([0.5, 0.5 + 40 / profileStep], [-1.9, -1.9], '-r', linewidth=4)
plt.annotate(s="window", xy=(1.0, -1.83))

# Annotate number of items
plt.annotate(s='n = %d' % u, xy=(26.0, 1.8))
plt.xlim([-0.5, v - 0.5])
plt.ylim([-2, 2.5])

plt.title("lvplot")