Example #1
0
    def visualize_importance(self):
        feature_importance_df = pd.DataFrame()

        for i, model in enumerate(self.models):
            _df = pd.DataFrame()
            self.importance = model.get_feature_importance()
            _df['feature_importance'] = model.get_feature_importance()
            _df['column'] = self.feature_cols.tolist()
            _df['fold'] = i + 1
            feature_importance_df = pd.concat([feature_importance_df, _df],
                                              axis=0,
                                              ignore_index=True)

        order = feature_importance_df.groupby('column').sum()[[
            'feature_importance'
        ]].sort_values('feature_importance', ascending=False).index[:50]

        fig, ax = plt.subplots(2, 1, figsize=(max(6, len(order) * .4), 14))
        sns.boxenplot(data=feature_importance_df,
                      x='column',
                      y='feature_importance',
                      order=order,
                      ax=ax[0],
                      palette='viridis')
        ax[0].tick_params(axis='x', rotation=90)
        ax[0].grid()
        fig.tight_layout()
        return fig, ax
def plot_entanglement_boxes(data, plot_vars, save=True):
    """Boxplot planning time vs entanglement"""
    if data.empty:
        return
    plt.rcParams.update({'font.size': cfg.FONTSIZE})
    _, ax = plt.subplots(figsize=cfg.FIGSIZE)
    sns.boxenplot(x='entanglement', y='transitions', data=data, color='C0', ax=ax, showfliers=False)
    n_values = list(data['n_values'])[0]

    ax.set_ylim(plot_vars.ylim)
    plt.xlabel('Effect size')
    ax.set_ylabel('Generated states')
    ax.set_yscale('linear')
    ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%2.0f'))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(plot_vars.tick_size))
    ax.set_ylabel('Generated states' + autoscale_yticks(ax, dtype=int))
    # sns.despine()
    plt.tight_layout()
    plt.subplots_adjust(top = .95, bottom = .2, right = .95, left = 0.25,
            hspace = 0, wspace = 0)
    plt.margins(0,0)
    if save:
        plt.savefig('results/plots/{}/{}_{}ary.png'.format(
            cfg.DIR, cfg.NAME, n_values), dpi=100)
    plt.show()
def visualize_importance(models, feat_train_df):
    """lightGBM の model 配列の feature importance を plot する
    CVごとのブレを boxen plot として表現します.

    args:
        models:
            List of lightGBM models
        feat_train_df:
            学習時に使った DataFrame
    """
    feature_importance_df = pd.DataFrame()
    _df = pd.DataFrame()
    _df['feature_importance'] = models.feature_importance()
    _df['column'] = feat_train_df.columns
    feature_importance_df = pd.concat([feature_importance_df, _df],
                                      axis=0,
                                      ignore_index=True)

    order = feature_importance_df.groupby('column') \
                .sum()[['feature_importance']] \
                .sort_values('feature_importance', ascending=False).index[:50]

    fig, ax = plt.subplots(figsize=(len(order) * .4, 7))
    sns.boxenplot(data=feature_importance_df,
                  x='column',
                  y='feature_importance',
                  order=order,
                  ax=ax,
                  palette='viridis')
    ax.tick_params(axis='x', rotation=90)
    fig.tight_layout()
    plt.show()
    return fig, ax
    def sns_boxenplot(self, dataframe, x_name=None, y_name=None, hue_str=None):
        """
        Boxenplot - similar to box plots but provides more information about
        the distribution as it plots more quantiles. Useful for large datasets

        Parameters
        ----------
        dataframe : dataframe
            data container
        x_name : str, optional
            column name for the x-axis. The default is None.
        y_name : str, optional
            column name for the y-axis. The default is None.
        hue_str : str, optional
            name of categorical data to color by. The default is None.

        Returns
        -------
        None.
        
        eg. 
        diamonds = sns.load_dataset('diamonds').sort_values('color')
        myplot.sns_boxenplot(diamonds,'color','price')
        """
        sns.boxenplot(data=dataframe, x=x_name, y=y_name, hue=hue_str, 
                      palette='deep')
        plt.show()
Example #5
0
def graph_univar_pred(df_data, list_var, col_predict, var_type):
    """
    Graph univariate for each variable in list compared with numerical prediction (label in dF)
        var_type =
        "num" if numeric:       Violin Chart
        "cat" if categorical:   Barplot count
    """
    # Setup the graph subplot
    fig, axs = plt.subplots(len(list_var),
                            1,
                            figsize=set_size(plt_wd, len(list_var), 1))
    fig.subplots_adjust(hspace=0.3)  # Adjust space between rows
    # loop for graphing
    for i, item in enumerate(list_var):
        cond_a = df_data[item].isna() == False
        cond_b = df_data[col_predict] != np.nan
        dF_dummy = df_data.loc[cond_a & cond_b]
        if var_type == "num":
            sns.boxenplot(x=col_predict, y=item, data=dF_dummy, ax=axs[i])
        if var_type == "cat":
            sns.countplot(x=item, hue=col_predict, data=dF_dummy, ax=axs[i])
            # Add the percentage in the graph
            total = dF_dummy.shape[0]
            for p in axs[i].patches:
                percentage = '{:.0f}%'.format(100 * p.get_height() / total)
                x = p.get_x() + p.get_width() + 0.02
                y = p.get_y() + p.get_height() / 2
                axs[i].annotate(percentage, (x, y))
    return
Example #6
0
def detect_outliers(df, target):
    for col in df:

        plt.figure(figsize=(14, 11))
        plt.title(col)
        plt.suptitle('Detect Outliers')

        if df[col].dtypes == object:
            sb.boxplot(df[col], df[target], color='gray')
            sb.boxenplot(df[col], df[target])
        else:
            plt.subplot(311)
            try:
                sb.distplot(df[col], hist=False, rug=True)
            except:

                sb.kdeplot(df[col], bw=0.3)

            plt.subplot(312)
            sb.scatterplot(df[col], df[target])

            plt.subplot(313)
            sb.boxenplot(df[col])
            sb.boxplot(df[col], color='gray')

        plt.show()
def plot_grouped_boxplot(first_level_tests, attn_rois):
    colnames = list(first_level_tests)
    sessions = pd.unique([c.split()[0] for c in colnames])
    connections, mirrors = mirror_strfind(attn_rois)

    melty = first_level_tests.melt(var_name='Old Columns',
                                   value_name='Phase-phase coupling')
    filo, raph = [], []
    for old_col in melty['Old Columns'].values:
        raph.append(old_col.split()[0])
        filo.append(old_col.split()[1])
    melty['Connection'] = filo
    melty['Session'] = raph

    for mir in mirrors:
        idx = melty[melty['Connection'] == mir].index
        melty.drop(idx, inplace=True)

    print(melty)

    sns.set(style='darkgrid')
    fig, ax = plt.subplots(figsize=(16, 9))

    sns.boxenplot(x='Connection',
                  y='Phase-phase coupling',
                  hue='Session',
                  data=melty)
    plt.show()
    def _summarize_stats_in_epochs(self, ax_auc: plt.Axes,
                                   ax_spikes: plt.Axes):
        """ Add axes to the main plot showing the dF/F statistics in the
        different epochs """

        df_auc = pd.DataFrame(
            # 1000 is max number of components per FOV
            np.full((1000, len(self.epochs_to_display)), np.nan),
            columns=self.epochs_to_display,
        )
        df_spikes = df_auc.copy()
        for epoch in self.epochs_to_display:
            cur_data = filter_da(self.fov.fluo_analyzed, epoch=epoch)
            if cur_data.shape[0] == 0:
                continue
            spikes = dff_tools.locate_spikes_scipy(cur_data,
                                                   self.fov.metadata.fps,
                                                   thresh=0.7)
            auc = dff_tools.calc_total_auc_around_spikes(
                spikes, cur_data, self.fov.metadata.fps)
            df_auc[epoch][:len(auc)] = auc
            spikes = dff_tools.calc_mean_spike_num(spikes,
                                                   cur_data,
                                                   fps=self.fov.metadata.fps)
            df_spikes[epoch][:len(spikes)] = spikes

        sns.boxenplot(data=df_auc, ax=ax_auc)
        sns.boxenplot(data=df_spikes, ax=ax_spikes)
        for ax in [ax_auc, ax_spikes]:
            ax.spines["top"].set_visible(False)
            ax.spines["right"].set_visible(False)
            ax.set_xlabel("Epoch")
        ax_auc.set_ylabel("AUC")
        ax_spikes.set_ylabel("Spikes per second")
Example #9
0
    def get_feature_importance(self,
                               train_feat_df: pd.DataFrame,
                               is_save=False,
                               filepath=None):
        feature_importance_df = pd.DataFrame()
        num = 0
        for i, model in self.models.items():
            _df = pd.DataFrame()
            _df['feature_importance'] = model.feature_importances_
            _df['column'] = train_feat_df.columns
            _df['fold'] = num + 1
            feature_importance_df = pd.concat([feature_importance_df, _df],
                                              axis=0,
                                              ignore_index=True)
            num += 1

        order = feature_importance_df.groupby('column')\
            .sum()[['feature_importance']]\
            .sort_values('feature_importance', ascending=False).index[:50]

        fig, ax = plt.subplots(figsize=(8, max(6, len(order) * .25)))
        if is_save:
            fig.savefig(filepath + "lgbm_feature_importance.png")
            _df.to_csv(filepath + "lgbm_feature_importance.csv", index=False)
        sns.boxenplot(data=feature_importance_df,
                      x='feature_importance',
                      y='column',
                      order=order,
                      ax=ax,
                      palette='viridis',
                      orient='h')
        ax.tick_params(axis='x', rotation=90)
        ax.set_title('Lightgbm Feature Importance')
        ax.grid()
        plt.show()
Example #10
0
def plot_distribution(process_df, fig=None):
    results_df = vesicle_release_distribution(process_df)
    release_durations = np.unique(process_df.release_duration)
    time = np.unique(process_df.time)
    if not fig:
        fig = plt.figure()
    grid_shape = (8, 1)
    totals = [
        results_df[results_df.release_duration == r].shape[0]
        for r in np.unique(process_df.release_duration)
    ]
    ax_1 = plt.subplot2grid(shape=grid_shape, loc=(0, 0), rowspan=4, fig=fig)
    sns.swarmplot(x='release_duration', y='offset', data=results_df, ax=ax_1)
    sns.boxenplot(x='release_duration', y='offset', data=results_df, ax=ax_1)
    # ax_1.set_xticks([])
    ax_1.xaxis.tick_top()
    ax_1.xaxis.set_label_position('top')
    ax_1.xaxis.set_ticklabels([*totals])
    ax_1.xaxis.set_tick_params(length=0)
    ax_1.set_xlabel('Total released')
    ax_1.set_title('Release offset distribution')

    ax_2 = plt.subplot2grid(shape=grid_shape, loc=(4, 0), rowspan=4, fig=fig)
    sns.boxplot(x='release_duration',
                y='num_released',
                data=results_df,
                ax=ax_2)
    sns.pointplot(x='release_duration',
                  y='num_released',
                  data=results_df,
                  ax=ax_2,
                  color='black')
    ax_2.set_xlabel('Release Duration')
    ax_2.set_ylabel('# of vesicles released')
    ax_2.set_title('# Vesicles per spike distribution')
def information_loss_plot(df, params):
    """
    Plot information loss as boxen plot
    :param df: Dataframe
    :param params: Parameters
    :return: None
    """
    plt.rcParams.update({"font.size": 20})
    new_df, save_string = filter_dataframe(df, params, ignore_sampling=True)
    df_li = create_li_df(new_df)
    df_li.sort_values("li_type", inplace=True)
    plt.figure(figsize=(15, 8))
    sb.boxenplot(
        x="li_type",
        y="li",
        hue=params["x"],
        hue_order=sorted(new_df[params["x"]].drop_duplicates().tolist()),
        data=df_li,
        dodge=True,
    )
    plt.title(params["name"], fontsize=16)
    plt.xlabel("Type of Lost Information")
    plt.ylabel("Lost Information")
    plt.ylim(-0.5, 1.)
    if params["save_plot"]:
        curr_dir = os.getcwd()
        Path(curr_dir + "/figures/data_analysis").mkdir(parents=True,
                                                        exist_ok=True)
        save_name = curr_dir + "/figures/data_analysis/%s_lost_information.png" % save_string
        plt.savefig(save_name)
        plt.close()
    else:
        plt.show()

    plt.show()
Example #12
0
 def draw_box_plots(self, data, xaxis, plot_name, xlabel, ylabel,
                    plot_saved_path, plot_format):
     """
     Draw box plots according to different parameters.
     :param data: pandas.Dataframe
            The data used to draw the graph.
     :param xaxis: list
            The values of X-axis.
     :param plot_name: string
            The name of this graph.
     :param xlabel: string
            The label of X-axis.
     :param ylabel: string
            The label of Y-axis.
     :param plot_saved_path: string
            The file path to save the graph.
     :param plot_format: string
            png or other formats.
     :return: None
     """
     sns.boxenplot(data=data, order=xaxis)  # Draw the box plot.
     plt.title(plot_name, config.new_ft)
     plt.xlabel(xlabel, config.new_ft)
     plt.ylabel(ylabel, config.new_ft)
     if self.is_show:
         plt.show()
     else:
         plt.savefig(config.path_for_thesis + plot_saved_path,
                     format=plot_format,
                     dpi=self.dpi)
         plt.close()
Example #13
0
def target_distribution_over_binary_groups(df, binary_cols, target_col, plot_type='boxenplot', **plot_kwargs):
    '''For use during feature engineering.  Pass a DataFrame
    with a list of `binary_cols` that represent the names of columns
    that are binary categories.  The `target_col` str is the variable
    you are trying to model.  Requires seaborn >= 0.9.0.
    '''
    for col in binary_cols:
        if plot_type=='boxenplot':
            sns.boxenplot(y=df[target_col], x=df[col], **plot_kwargs)
        elif plot_type=='violinplot':
            sns.violinplot(y=df[target_col], x=df[col], **plot_kwargs)
        else:
            sns.boxplot(y=df[target_col], x=df[col], **plot_kwargs)
        ax = plt.gca()
        mu0, mu1 = df[target_col].groupby(df[col]).mean()
        sd0, sd1 = df[target_col].groupby(df[col]).std()
        ncol = df.loc[df[col]==1].shape[0]
        ax.axhline(mu0, label=f'mean = {round(mu0, 2)}|{col} = 0', color='blue', linestyle=':')
        ax.axhline(mu1, label=f'mean = {round(mu1, 2)}|{col} = 1 with {ncol} observations',
                   color='orange', linestyle='-.')
        ax.grid(alpha=.4)
        ax.set_title(col)
        sns.despine()
        ax.legend(loc='best')
        plt.show()
Example #14
0
def make_boxenplot_chem(low_col, high_col, xlabel_low, xlabel_high, ylabel, low_color, high_color, out_name):
	fig = plt.figure()
	x = range(2)
	f, axes = plt.subplots(1, 2, sharey=True, sharex=True)
	p1 = sns.boxenplot(y=low_col, orient='vertical', ax=axes[0], color=low_color).set(xlabel = xlabel_low, ylabel = ylabel)
	p2 = sns.boxenplot(y=high_col, orient='vertical', ax=axes[1], color=high_color).set(xlabel = xlabel_high, ylabel = '')
	plt.savefig(out_name + '.png', bbox_inches='tight')
Example #15
0
def visualize_importance(models, train_feat_df, importance_type="gain"):
    feature_importance_df = pd.DataFrame()

    for i, model in enumerate(models):
        _df = pd.DataFrame()
        _df["feature_importance"] = model.feature_importances_
        _df["column"] = train_feat_df.columns
        _df["fold"] = i + 1

    feature_importance_df = pd.concat([feature_importance_df, _df],
                                      axis=0,
                                      ignore_index=True)

    order = (feature_importance_df.groupby("column").sum()[[
        "feature_importance"
    ]].sort_values("feature_importance", ascending=False).index[:50])

    fig, ax = plt.subplots(figsize=(8, max(6, len(order) * 0.25)))
    sns.boxenplot(
        data=feature_importance_df,
        y="column",
        x="feature_importance",
        order=order,
        ax=ax,
        palette="viridis",
    )
    ax.tick_params(axis="x", rotation=90)
    ax.grid(True)
    fig.tight_layout()
    return fig, ax
def explore_data_catbin(to_explore, df, target, pred_type='cat'):
    """
    Generates visualizations to explore the relationship between predictors
    and a binary categorical target. Specify the type of predictors using the
    `pred_type` parameter: accepted values are `cat` for categorical and 
    `cont` for continuous.
    
    This function assumes a binary target.
    
    """
    if pred_type not in ['cat', 'cont']:
        print("Error: `pred_type` should be 'cat' for categorical\
        predictors and 'cont' for continuous predictors. No other\
        values accepted.")
        return None

    disc = True if pred_type == 'cat' else False

    # get mean of target. Since target is binary, mean is representative
    # of the proportion of 1 labels to 0 labels
    pop_mean = np.round(df[target].mean(), 4)

    # draw plots
    for col in to_explore:
        fig, [ax1, ax2] = plt.subplots(figsize=(10, 5), nrows=1, ncols=2)
        plt.tight_layout(pad=3)

        sns.histplot(data=df, x=col, ax=ax1, discrete=disc)
        ax1.set_title(f"Distribution of {col}")

        if pred_type == 'cat':

            sns.pointplot(data=df,
                          x=col,
                          y=target,
                          ci=68,
                          ax=ax2,
                          join=False,
                          scale=1.5,
                          capsize=0.05)
            ax2.set_title("Target Mean per Category")
            ax2.axhline(pop_mean,
                        color='red',
                        ls='dashed',
                        label='population mean')
            ax2.legend()

        elif pred_type == 'cont':

            sns.boxenplot(data=df,
                          x=col,
                          y=target,
                          ax=ax2,
                          orient='h',
                          width=1)
            ax2.set_title("Feature Distribution Per Target Class")

    return None
Example #17
0
def draw_boxplots(data_frame_scaled):
    plt.rcParams['figure.figsize'] = (40, 35)
    plt.subplot(3, 3, 1)
    sns.set_theme(style="whitegrid")
    # sns.boxplot(data = data_scaled,palette="Set3", linewidth=2.5)
    sns.boxenplot(data=data_frame_scaled, orient="h", palette="Set3")
    # sns.stripplot(data=data,orient="h",size=4, color=".26")

    plt.title('box plots types', fontsize=10)
Example #18
0
def plot_class_proba(model, x, y, show_graph=True, label_encoder=None):
    if label_encoder is not None:
        y = label_encoder.inverse_transform(y)
        
    df = pd.DataFrame({'Class': y, 'Probability': model.predict_proba(x)[:, 1]})
    sns.boxenplot(x='Class', y='Probability', data=df)
    
    if show_graph:
        graph.show()
def make_boxenplot_AH(holo_col, apo_col, xlabel, ylabel, title, out_name):
    fig = plt.figure()
    x = range(2)
    f, axes = plt.subplots(1, 2, sharey=True, sharex=True)
    p1 = sns.boxenplot(holo_col, orient='v', ax=axes[0]).set(xlabel='Holo',
                                                             ylabel=ylabel)
    p2 = sns.boxenplot(apo_col, orient='v', ax=axes[1]).set(xlabel='Apo',
                                                            ylabel='')
    plt.savefig(out_name + '.png')
Example #20
0
def subplot_draw1():
    fig = plt.figure(figsize=(10, 6))
    for i in range(10):
        plt.subplot(2, 6, i + 1)  # subplots 表示分布绘制系列图
        sns.boxenplot(df[colnm[i]], orient="v", width=0.5, color=color[0])
        plt.ylabel(colnm[i], fontsize=12)

    # plt.subplots_adjust(left=0.2, wspace=0.8, top=0.8)
    plt.tight_layout()  # 会自动调整子图参数,使之填充整个图像区域。避免重叠
    plt.show()
Example #21
0
def make_boxenplot_chem(low_col, high_col, xlabel_low, xlabel_high, ylabel,
                        out_name):
    fig = plt.figure()
    x = range(2)
    f, axes = plt.subplots(1, 2, sharey=True, sharex=True)
    p1 = sns.boxenplot(low_col, orient='v', ax=axes[0]).set(xlabel=xlabel_low,
                                                            ylabel=ylabel)
    p2 = sns.boxenplot(high_col, orient='v',
                       ax=axes[1]).set(xlabel=xlabel_high, ylabel='')
    plt.savefig(out_name + '.png')
Example #22
0
def four_rate_plot(property):

    fig, axs = plt.subplots(ncols=2, nrows=2, figsize=[20, 20], sharey=True)
    flat_axs = [ax for col in axs for ax in col]

    for rate, ax in zip(['delta', 'gamma', 'beta', 'alpha'], flat_axs):

        sns.boxenplot(x=rate, y=property, data=all_tests, ax=ax, color='grey')
        sns.stripplot(x=rate, y=property, data=all_tests, ax=ax, marker='.', size=3, jitter=True, color='black')

        sns.despine()
Example #23
0
def wykres_9(x, y, nazwa_wykres, nazwa_x, nazwa_y):
    f, ax = plt.subplots(figsize=(12, 8))

    ax.set_title(nazwa_wykres, fontsize=16)
    ax.set_ylabel(nazwa_y, fontsize=14)
    ax.set_xlabel(nazwa_x, fontsize=14)

    sns.boxenplot(data=selected_data,
                  x=selected_data[x],
                  y=selected_data[y],
                  scale="linear")
    return f
Example #24
0
def plot_annotation_entropy(adata_map, annotation='cell_type'):
    """
        
    """
    qk = np.ones(shape=(adata_map.n_obs, adata_map.n_vars))
    adata_map.obs['entropy'] = entropy(adata_map.X,
                                       base=adata_map.X.shape[1],
                                       axis=1)
    fig, ax = plt.subplots(1, 1, figsize=(10, 3))
    ax.set_ylim(0, 1)
    sns.boxenplot(x=annotation, y="entropy", data=adata_map.obs, ax=ax)
    plt.xticks(rotation=30)
Example #25
0
def check_outlier(df):
    features = df.columns
    sns.set_style("whitegrid")
    plt.figure(figsize=(24, 8))
    nonnumerical = ['Year', 'ShortName']
    for feature in features:
        if not feature in nonnumerical:
            sns.boxenplot(x=feature, orient='h', data=df)
            title = 'boxplot ' + feature
            plt.title(title)
            plt.savefig(filepath + 'inspection/boxplot/' + title + '.png')
            plt.clf()
Example #26
0
def plot_finish(finish_dict, experiment):
    folder = plot_folder + 'finish/'
    if not os.path.isdir(folder):
        os.mkdir(folder)

    if len(finish_dict) == 0:
        print('No finish Data available')
    else:
        # leaving soc vs leaving time scatter plot
        finish_df = pd.concat(finish_dict.values(), axis=0)
        finish_df = finish_df.sort_values('method')
        sns.boxenplot(data=finish_df, x='method', y='time')
        plt.savefig(folder + experiment + '_boxplot' + '.png')
        plt.close()
Example #27
0
def plot_graphs(feature_1, feature_2, df):
    fig, axs = plt.subplots(ncols=4)
    fig.set_figwidth(30)
    fig.set_figheight(8)
    plt.suptitle(feature_1 + ' vs. ' + feature_2)

    sns.boxenplot(x=feature_1, y=feature_2, data=df, ax=axs[0])
    sns.boxplot(x=feature_1, y=feature_2, data=df, ax=axs[1])
    sns.violinplot(x=feature_1,
                   y=feature_2,
                   data=df,
                   inner="points",
                   ax=axs[2])
    sns.barplot(x=feature_1, y=feature_2, data=df, ax=axs[3])
def draw_boxplot(data: pd.DataFrame) -> None:
    f, axes = plt.subplots(7, 4, figsize=(18, 24))
    global date_indexs, labels
    count = 0
    for i in [
            x for x in data.columns
            if x not in date_indexs + labels + ['date']
    ]:
        sns.boxenplot(x=i,
                      y='total_purchase_amt',
                      data=data,
                      ax=axes[count // 4][count % 4])
        count += 1
    plt.show()
 def _BoxPlot(self):
     '''This method is used to plot the Boxen plots of all Categorical variables in dataframe'''
     df = self.train_df.copy()
     fig = plt.figure(figsize=(14, 18))
     for idx, col in enumerate(self.cat_cols):
         if len(self.train_df[col].unique()) < 10:
             df[col +
                 '_mean'] = df.groupby(col)[self.target_col].transform('mean')
             fig.add_subplot(3, 2, idx+1)
             sns.boxenplot(x=col, y=self.target_col,
                           data=df.sort_values(col + '_mean'))
             plt.title('Comparison of salaries as per {}'.format(
                 col), fontsize=14)
             plt.tight_layout()
             plt.xticks(rotation=45)
Example #30
0
    def line_integrals(self):
        for a, kind in zip([self.lines, self.lines_filtered], ["all", "flt"]):
            # for a, kind in zip([self.lines_filtered], ["flt"]):
            # optional: filter a subgroup
            # col_order = ["Arrest", "Cycling"]
            # col_order = ['Cycling', 'Arrest', 'Release',
            #              'Cyto2ug-Cyc', 'Cyto2ug-Arr', 'Cyto2ug-Rel',
            #              'Noc20ng-Cyc', 'Noc20ng-Arr', 'Noc20ng-Rel']
            col_order = a["Compound"].unique()
            print(col_order)
            a = a[a["Compound"].isin(col_order)]
            # get only one row per z-stack
            idx = a.groupby(["unit"])["s_max"].transform(max) == a["s_max"]
            a = a.loc[idx]

            fig = plt.figure(figsize=(8, 8), dpi=150)
            ax = fig.gca()
            sns.boxenplot(x="Compound", y="sum", order=col_order, data=a)
            ax.yaxis.set_major_formatter(self.formatter)
            ax.set_yscale('log')
            ax.set_xticklabels(ax.xaxis.get_ticklabels(),
                               rotation=45,
                               multialignment='right')
            path = o.ensure_dir(
                os.path.join(self.cc.base_path, 'out', 'graphs',
                             'line_boxplot_%s.pdf' % kind))
            fig.savefig(path)
            plt.close()

            fig = plt.figure(figsize=(8, 8), dpi=150)
            ax = fig.gca()
            sns.scatterplot(x="v_width",
                            y="sum",
                            data=a,
                            hue="Compound",
                            alpha=0.1,
                            rasterized=True)
            # plt.xscale('log')
            # plt.yscale('log')
            ax.set_xlim((0, 16))
            ax.set_ylim((0, 350e3))
            ax.xaxis.set_major_formatter(self.formatter)
            ax.yaxis.set_major_formatter(self.formatter)
            path = o.ensure_dir(
                os.path.join(self.cc.base_path, 'out', 'graphs',
                             'lines_scatter_%s.pdf' % kind))
            fig.savefig(path)
            plt.close()
"""
Plotting large distributions
============================

"""
import seaborn as sns
sns.set(style="whitegrid")

diamonds = sns.load_dataset("diamonds")
clarity_ranking = ["I1", "SI2", "SI1", "VS2", "VS1", "VVS2", "VVS1", "IF"]

sns.boxenplot(x="clarity", y="carat",
              color="b", order=clarity_ranking,
              scale="linear", data=diamonds)