Python stripplot Beispiele, seaborn.stripplot Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: iRep.py Projekt: darmitage/iRep

def plot_tests(genomes, pairs, out, plot, cats, y_lab, normalize = False):
    """
    plot test data
    """
    lengths = []
    slopes = []
    samples = []
    n50s = []
    for g, s in pairs:
        sample = genomes[g]['samples'][s]
        s = s.rsplit('.', 1)[0].replace('_', ' ')
        l, n50, m = sample['test']
        lengths.extend(l)
        slopes.extend(m) 
        samples.extend([s for i in m])
        n50s.extend(n50)
    if normalize == 'log2':
        slopes = log_trans(slopes)
    slope_fs = pd.DataFrame({cats:lengths, y_lab:slopes, 'sample':samples, 'n50':n50s})
    slope_fs.to_csv(out, sep = '\t')
    slope_fs = slope_fs[slope_fs[y_lab] != False]
    sns.set_style('whitegrid')
    sns.set_context('poster')
    sns_plot = sns.boxplot(x = cats, y = y_lab, data = slope_fs, \
            hue = 'sample', palette = 'deep')
    sns.stripplot(x = cats, y = y_lab, data = slope_fs, \
            hue = 'sample', palette = 'deep', \
            jitter = True, size = 5, edgecolor = 'gray')
    plt.legend(loc = 'upper right', bbox_to_anchor=(1.05, 1))
    sns_plot.figure.savefig('%s' % (plot), bbox_inches = 'tight')

Beispiel #2

0

Datei anzeigen

Datei: analyse_distribution_stats.py Projekt: ARCHER-CSE/parallel-io

def main(argv):

    # Lists of marker styles and line styles
    markers = 10 * ['o','^','x']
    lines = 10 * ['-','--','-.']

    infile = sys.argv[1]

    resframe = pd.read_csv(infile)

    print "Summary of all results found:"
    print resframe

    fig, ax = plt.subplots()

    # sns.pointplot(x='Writers', y='Write Bandwidth (MiB/s)',
    #   data=resframe, hue='Scheme', scale=0.75, markers=markers,
    #   linestyles=lines, estimator=np.median, dodge=True, ci=100.0)
    sns.stripplot(x='Writers', y='Write Bandwidth (MiB/s)',
      data=resframe, hue='Scheme', jitter=True, split=True)
    ax.set_ylim(ymin=0)

    plt.ylabel('Write Bandwidth / MiB/s')
    plt.xlabel('Writers')
    plt.legend()
    plt.savefig('dist_bandwidth_stats.png')
    plt.clf()

    sys.exit(0)

Beispiel #3

0

Datei anzeigen

Datei: gbrv_outdb.py Projekt: ebousq/pseudo_dojo

    def plot_errors_for_elements(self, ax=None, **kwargs):
        """
        Plot the relative errors associated to the chemical elements.
        """
        dict_list = []
        for idx, row in self.iterrows():
            rerr = 100 * (row["this"] - row["ae"]) / row["ae"]
            for symbol in set(species_from_formula(row.formula)):
                dict_list.append(dict(
                    element=symbol,
                    rerr=rerr,
                    formula=row.formula,
                    struct_type=row.struct_type,
                    ))

        frame = DataFrame(dict_list)
        order = sort_symbols_by_Z(set(frame["element"]))
        #print_frame(frame)

        import seaborn as sns
        ax, fig, plt = get_ax_fig_plt(ax=ax)

        # Draw violinplot
        #sns.violinplot(x="element", y="rerr", order=order, data=frame, ax=ax, orient="v")

        # Box plot
        ax = sns.boxplot(x="element", y="rerr", data=frame, ax=ax, order=order, whis=np.inf, color="c")
        # Add in points to show each observation
        sns.stripplot(x="element", y="rerr", data=frame, ax=ax, order=order,
                      jitter=True, size=5, color=".3", linewidth=0)

        sns.despine(left=True)
        ax.set_ylabel("Relative error %")
        ax.grid(True)
        return fig

Beispiel #4

0

Datei anzeigen

Datei: plot.py Projekt: clarkfitzg/ballistics

def make_plots(groups):

    sns.stripplot("ammo", "moa", data=groups, jitter=True)
    postprocess()
    plt.savefig("points.png")

    plt.clf()
    sns.boxplot("ammo", "moa", data=groups)
    postprocess()
    plt.savefig("boxplot.png")

    plt.clf()
    sns.barplot("ammo", "mean", data=groups, ci=None)
    plt.title("mean moa for best 9 of 10 five shot groups")
    plt.ylabel("moa")
    postprocess()
    plt.savefig("avg_moa.png")

    plt.clf()
    std = groups["standard"]
    std = std[std.notnull()]

    fig, axes = plt.subplots(ncols=2)
    sns.distplot(std, ax=axes[0])
    stats.probplot(std, plot=axes[1])
    fig.set_size_inches(6, 4)
    fig.tight_layout()
    plt.savefig("qqplot.png")

Beispiel #5

0

Datei anzeigen

Datei: visuals.py Projekt: malcolmjmr/trading

def view_distribution(df,x="type",y="rate", plt=plt):
    asset = df.symbol.values[0]
    plt.figure(1,figsize=(15,15))
    sns.violinplot(x=x, y=y, data=df, inner=None)
    sns.stripplot(x=x, y=y, data=df, jitter=True, color="white", edgecolor="gray")
    plt.title(y+' distribution ('+asset+')')
    plt.show()

Beispiel #6

0

Datei anzeigen

Datei: Main.py Projekt: J-Sieber/Forest-Cover-Type

def Create_WildPlot(X, y1, y2, y3, y4):
    #Creates strip plot of x and y
    xlab = X.name
    ylab = 'Wilderness Area'
    xlab = xlab.replace("_"," ")
    figlab = ylab + " vs " + xlab
    filelab =  "Plots/" + figlab.replace(" ","") + ".pdf"
    f, ax = plt.subplots(figsize=(5, 5))
    
    y = y1
    n = len(y)
    
    for i in range (0,n):
        if y1[i] == 1:
            y[i] = 1
        elif y2[i] == 1:
            y[i] = 2
        elif y3[i] == 1:
            y[i] = 3
        elif y4[i] == 1:
            y[i] = 4
    
    sns.stripplot(x = X, y = y, jitter = True, size = 5, linewidth = 0.1, ax = ax)
    sns.plt.title(figlab)
    sns.plt.xlabel(xlab)
    sns.plt.ylabel(ylab)
    savefig(filelab)

Beispiel #7

0

Datei anzeigen

Datei: plotting.py Projekt: dhuppenkothen/entrofy

def _plot_categorical_and_continuous(df, xlabel, ylabel, x_keys, y_keys, ax,
                                     cmap, n_cat=5, plottype="box"):
    """
    Plot a categorical variable and a continuous variable against each
    other. Types of plots include box plot, violin plot, strip plot and swarm
    plot.

    Parameters
    ----------
    df : pd.DataFrame
        A pandas DataFrame with the data

    xlabel : str
        The column name for the variable on the x-axis

    ylabel : str
        The column name for the variable on the y-axis

    ax : matplotlib.Axes object
        The matplotlib.Axes object to plot the bubble plot into

    cmap : matplotlib.cm.colormap
        A matplotlib colormap to use for shading the bubbles

    n_cat : int
        The number of categories; used for creating the colour map

    plottype : {"box" | "violin" | "strip" | "swarm"}
        The type of plot to produce; default is a box plot

    Returns
    -------
    ax : matplotlib.Axes object
        The same matplotlib.Axes object for further manipulation

    """
    if x_keys is xlabel:
        keys = y_keys
    elif y_keys is ylabel:
        keys = x_keys
    else:
        raise Exception("Something went terribly, horribly wrong!")

    current_palette = sns.color_palette(cmap, n_cat)
    if plottype == "box":
        sns.boxplot(x=xlabel, y=ylabel, data=df, order=keys,
                    palette=current_palette, ax=ax)
    elif plottype == "strip":
        sns.stripplot(x=xlabel, y=ylabel, data=df, order=keys,
                      palette=current_palette, ax=ax)
    elif plottype == "swarm":
        sns.swarmplot(x=xlabel, y=ylabel, data=df, order=keys,
                      palette=current_palette, ax=ax)
    elif plottype == "violin":
        sns.violinplot(x=xlabel, y=ylabel, data=df, order=keys,
                       palette=current_palette, ax=ax)
    else:
        raise Exception("plottype not recognized!")

    return ax

Beispiel #8

0

Datei anzeigen

Datei: plotting.py Projekt: Hushpar/titanic_ml

def p7(data):
    # Распределение выживших
    f, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8), sharex=True)

    sns.stripplot(
        "Pclass",
        "Age",
        "Survived",
        data=data[data["Sex"] == "male"],
        palette="Set2",
        size=20,
        hue_order=(1, 0),
        marker="D",
        alpha=0.25,
        jitter=True,
        ax=ax1,
    )
    ax1.set_title("MALE")

    sns.stripplot(
        "Pclass",
        "Age",
        "Survived",
        data=data[data["Sex"] == "female"],
        palette="Set2",
        size=20,
        marker="D",
        alpha=0.25,
        order=(1, 2, 3),
        jitter=True,
        ax=ax2,
    )
    ax2.set_title("FEMALE")

    plt.show()

Beispiel #9

0

Datei anzeigen

Datei: plots.py Projekt: KamalakerDadi/Data-Processing

def stripplot_to_pdf(data, save_path, x=None, y=None, hue=None,
                     style='whitegrid', fontsize=2, rows=1, cols=1,
                     figsize=(4, 4), **kwargs):
    """ Data plotted as stripplot using seaborn and saved in a pdf
    given in save_path

    Parameters
    ----------
    data : pd.DataFrame or path to csv file
        single or list of data to plot into pdf.

    save_path : str
        Path to save the pdf plot.

    """
    if isinstance(data, basestring):
        data = pd.read_csv(data)

    if isinstance(data, (list, tuple)):
        cols = len(data)

    if not isinstance(data, (list, tuple)):
        data = [data, ]

    sns.set_style(style)
    sns.set(font_scale=fontsize)

    with PdfPages(save_path) as pdf:
        fig, axes = plt.subplots(nrows=rows, ncols=cols, figsize=figsize,
                                 squeeze=True, sharey=True)
        axes = axes.reshape(-1)
        for ax, d in zip(axes, data):
            sns.stripplot(x=x, y=y, hue=hue, data=d, ax=ax, **kwargs)
        pdf.savefig(fig)
        plt.close()

Beispiel #10

0

Datei anzeigen

Datei: DataAnalysisTool0.3.py Projekt: williamzhuang/gcms-data-analysis

def timePlotScatter(data):
    geneNamesDict = {}
    for _, row in data.iterrows():
        geneNamesDict[row['Gene']] = 1

    data = data.pivot_table('Values', ['Sample'], ['Gene', 'Time'])
    geneList = geneNamesDict.keys()

    counter = 1
    box = input("Do you want a boxplot for each timepoint? (y/n): ")
    normalize = input("Would you like to normalize the y-axis? (y/n): ")
    ylabel = input("What should the y-axis label be?: ")

    for key in geneList:
        title = key
        plt.figure(counter)
        tempTable = data[key]
        if normalize == 'y':
            tempTable = tempTable / np.amax(tempTable.values)
            title = key + " Normalized"
        if box == "y":
            makeBoxplot(tempTable)
           
        sns.stripplot(data=tempTable, size = 7, jitter = True, palette = sns.color_palette("Set1", n_colors=8, desat=.9))
        plt.title(title)
        plt.ylabel(ylabel)
        plt.xlabel('Time(min)')
        counter += 1
    plt.gca().set_ylim(bottom = 0)
    plt.show()

Beispiel #11

0

Datei anzeigen

Datei: DataAnalysisTool0.4.py Projekt: williamzhuang/gcms-data-analysis

def conditionPlot(data):

    data = data.pivot_table('Values', ['Sample'], ['Gene', 'Condition'])
    answer = input("Do you want a boxplot to go with your data? (y/n): ")
    normalize = input("Would you like to normalize the y-axis? (y/n): ")
    if normalize == 'y':
        data = data / np.amax(data.fillna(0).values)
        
    if answer == "y":
        print("What kind of boxplot do you want?")
        print("\"1\" for a quartile boxplot.")
        print("\"2\" for a standard deviation boxplot.")
        boxStyle = input("Your answer: ")

        if boxStyle == "1":
            makeBoxplotQuartile(data)
        else:
            makeBoxplotDeviation(data)

        makeBoxplot(data)
        # Iterate through the columns, generating data to form the boxplot.

    sns.stripplot(data=data, size = 7, jitter = True, palette = sns.color_palette("Set1", n_colors=8, desat=.9))
    ylabel = input("What should the y-axis label be?: ")

    plt.ylabel(ylabel)
    plt.xlabel('Gene/Condition')
    exportExcel(data);
    plt.show()

Beispiel #12

0

Datei anzeigen

Datei: median_compare_to_consensus.py Projekt: XiaoxiaoLiu/morphology_analysis

def plot_compare_median_consensus(output_dir, df_order, metric, type = 'ts',DISPLAY = 0):
    plt.figure()


    if type =='ts':
        #sb.tsplot(data=df_order, value=metric,time='order',unit="algorithm",condition="algorithm",err_style="unit_traces")

        ax = sb.boxplot(x=metric, y="algorithm", data=df_order,
                 whis=np.inf, color="c")

        # Add in points to show each observation
        sb.stripplot(x=metric, y="algorithm", data=df_order,
                jitter=True, size=3, color=".3", linewidth=0)
        ax.set_xscale("log")
        sb.despine(trim=True)

       # plt.xlabel('images sorted by the average neuron distance of the median reconstruction')
        plt.savefig(output_dir + '/ts_compare_median_with_consensus_'+metric+'.png', format='png')


    if type =='lm':
        sb.lmplot(x="order", y=metric, hue="algorithm", data=df_order)
        plt.xlabel('images sorted by the average neuron distance of the median reconstruction')
        plt.savefig(output_dir + '/lm_compare_median_with_consensus_'+metric+'.lm.png', format='png')

    if DISPLAY:
         plt.show()
    plt.close()

Beispiel #13

0

Datei anzeigen

Datei: regions_vs_nonregions_comparison_in_boxplot.py Projekt: KamalakerDadi/Data-Processing

def stripplot_mean_score(df, save_path, atlas=None, suffix=None, x=None,
                         y=None, hue=None, style='whitegrid', fontsize=14,
                         jitter=.2, figsize=(9, 3), leg_pos=2, axx=None):

    def change_label_name(row, label):
        row[label] = new_names[row[label]]
        return row

    ylabel = atlas
    aliases = {'kmeans': 'K-Means',
               'ica': 'GroupICA',
               'dictlearn': 'Dictionary Learning',
               'basc': 'BASC'}
    if atlas == 'kmeans':
        new_names = {'no': 'Without\n regions extracted',
                     'yes': 'With\n regions extracted'}
        df = df.apply(lambda x: change_label_name(x, y), axis=1)
    else:
        new_names = {'no': 'Without\n regions extracted',
                     'yes': 'With\n regions extracted'}
        df = df.apply(lambda x: change_label_name(x, y), axis=1)

    # change the name of the dataset to upper
    df['dataset'] = df['dataset'].str.upper()

    # make labels of the y axes shorter
    # df[y] = df[y].str.wrap(13)

    rc('xtick', labelsize=12)
    rc('ytick', labelsize=16)
    rc('axes', labelweight='bold')  # string.capitalize
    rc('legend', fontsize=fontsize)

    n_data = len(df['dataset'].unique())
    palette = color_palette(n_data)

    # draw a default vline at x=0 that spans the yrange
    axx.axvline(x=0, linewidth=4, zorder=0, color='0.6')

    sns.boxplot(data=df, x=x, y=y, fliersize=0, linewidth=2,
                boxprops={'facecolor': '0.5', 'edgecolor': '.0'},
                width=0.5, ax=axx)

    sns.stripplot(data=df, x=x, y=y, hue=hue, edgecolor='gray',
                  size=5, split=True, palette=datasets_palette, jitter=jitter,
                  ax=axx)

    axx.set_xlabel('')
    # axx.set_ylabel(aliases[ylabel], fontsize=15)
    axx.set_ylabel('')
    plt.text(.5, 1.02, aliases[key], transform=ax.transAxes, size=15, ha='center')

    # make the positive labels with "+"
    axx_xticklabels = []
    for x in axx.get_xticks():
        if x > 0:
            axx_xticklabels.append('+' + str(x) + '$\%$')
        else:
            axx_xticklabels.append(str(x) + '$\%$')
    axx.set_xticklabels(axx_xticklabels)

Beispiel #14

0

Datei anzeigen

Datei: MasterScript_forclass.py Projekt: clintval/systems_micro

    def CheckShannonIndex(self, labels=None, condition_dict=None, fig_title=None):
        # Description: calculate the Shannon entropy of all samples, and plot on boxplot
        # If labels is specified, also plot the entropy of samples in each of the labels.
        def ShannonIndex(numList):   ## Calculate Shannon Entropy
            SU = sum(numList)
            SDI = 0.0
            for num in numList:
            	freq = float(num)/SU
            	if freq>0:
        	    	SDI = SDI - freq * np.log(freq)
            return SDI

        print('Making Shannon Diversity boxplot for all samples')

        # Calculate shannon entropy for each sample
        SDIs = pd.DataFrame(index=self.abun_df.index, columns=['SDI'])
        for sample in self.abun_df.index:
            SDIs.loc[sample, 'SDI'] = ShannonIndex(self.abun_df.loc[sample])
        # Add metadata labels to the df containing SDIs
        SDIs = pd.concat([SDIs, self.meta_df], axis=1)
        SDIs['SDI'] = SDIs['SDI'].astype('float64')
        self.SDI = SDIs

        # Plot all boxplots, and save if fig_title was given
        if fig_title:
            fig_ext = fig_title.rsplit('.',1)[1]
            fig_title = fig_title.rsplit('.',1)[0]

        # First plot SDI of all samples
        if fig_title:
            ax = sb.violinplot(x=SDIs['SDI'], inner=None, saturation=0.35)
            ax = sb.stripplot(x=SDIs['SDI'], jitter=True, size=5, linewidth=0.6)
            fig = ax.get_figure()
            fig.savefig(fig_title + '_all.violinplot.' + fig_ext)
            plt.close()
            # Do the boxplot
            ax = sb.boxplot(x=SDIs['SDI'])
            ax = sb.stripplot(x=SDIs['SDI'], jitter=True, size=5, linewidth=0.6)
            fig = ax.get_figure()
            fig.savefig(fig_title + '_all.boxplot.' + fig_ext)
            plt.close()

        if labels:
            print('Making boxplots separated by labels: ')
            for label in labels:
                print(label + '...')
                # Try with seaborn library
                SDIs[label] = SDIs[label].astype('category')
                ax = sb.violinplot(x=label, y='SDI', data=SDIs, saturation=0.35, inner=None)
                ax = sb.stripplot(x=label, y='SDI', data=SDIs, jitter=True, size=5, linewidth=0.6)
                fig = ax.get_figure()
                fig.savefig(fig_title + '_' + label + '.violinplot.' + fig_ext)
                plt.close(fig)
                # Boxplot
                ax = sb.boxplot(x=label, y='SDI', data=SDIs, saturation=0.35)
                ax = sb.stripplot(x=label, y='SDI', data=SDIs, jitter=True, size=5, linewidth=0.6)
                fig = ax.get_figure()
                fig.savefig(fig_title + '_' + label + '.boxplot.' + fig_ext)
                plt.close()

Beispiel #15

0

Datei anzeigen

Datei: visual_exploration.py Projekt: potatochip/kojak

def strip(X, y, description):
    '''for visualizing categorical data'''
    for i in X.iteritems():
        feature_title = i[0]
        sns.stripplot(x=i[1], y=y, jitter=True)
        plt.savefig('visuals/'+feature_title+'_'+description+'_strips')
        print('visuals/'+feature_title+'_'+description+'_strips')
        plt.close()

Beispiel #16

0

Datei anzeigen

Datei: plot_types.py Projekt: Corlobin/brute-force-plotter

def bar_box_violin_dot_plots(data, category_col, numeric_col, axes,
                             file_name=None):
    sns.barplot(category_col, numeric_col, data=data, ax=axes[0])
    sns.boxplot(category_col, numeric_col,
                data=data[data[numeric_col].notnull()], ax=axes[2])
    sns.violinplot(category_col, numeric_col, data=data, kind='violin', inner="quartile", scale='count', split=True,
                   ax=axes[3])
    sns.stripplot(category_col, numeric_col, data=data, jitter=True, ax=axes[1])
    sns.despine(left=True)

Beispiel #17

0

Datei anzeigen

Datei: analyze_movies.py Projekt: trishaandrews/movie_analysis

def plot_domestic_origin(df, predicted=None):
    ax = plt.subplot(111)
    ax.xaxis.set_major_formatter(tkr.FuncFormatter(lambda x, 
                                                   pos: ('%.0f')%(x*1e-6)))
    sns.stripplot(x="DomLifeGross", y="OriginC", data=df)
    sns.plt.xlabel("Domestic Lifetime Gross (millions)")
    sns.plt.ylabel("Country of Origin")
    sns.despine()
    sns.plt.show()

Beispiel #18

0

Datei anzeigen

Datei: DataAnalysisTool0.3.py Projekt: williamzhuang/SynBioCAD-STANDALONE

def conditionPlot(data):

    data = data.pivot_table('Values', ['Sample'], ['Gene', 'Condition'])
    answer = input("Do you want a boxplot to go with your data? (y/n): ")
    if answer == "y":
        sns.boxplot(data=data)
    sns.stripplot(data=data, size = 6, jitter = True, edgecolor = "black")
    plt.ylabel('Values')
    plt.xlabel('Gene/Condition')
    plt.show()

Beispiel #19

0

Datei anzeigen

Datei: gbrv_outdb.py Projekt: gmatteo/pseudo_dojo

    def plot_errors_for_elements(self, ax=None, **kwargs):
        """
        Plot the relative errors associated to the chemical elements.
        """
        dict_list = []
        for idx, row in self.iterrows():
            rerr = 100 * (row["this"] - row["ae"]) / row["ae"]
            for symbol in set(species_from_formula(row.formula)):
                dict_list.append(dict(
                    element=symbol,
                    rerr=rerr,
                    formula=row.formula,
                    struct_type=row.struct_type,
                    ))

        frame = DataFrame(dict_list)
        order = sort_symbols_by_Z(set(frame["element"]))
        #print_frame(frame)

        import seaborn as sns
        ax, fig, plt = get_ax_fig_plt(ax=ax)

        # Draw violinplot
        #sns.violinplot(x="element", y="rerr", order=order, data=frame, ax=ax, orient="v")

        # Box plot
        ax = sns.boxplot(x="element", y="rerr", data=frame, ax=ax, order=order, whis=np.inf, color="c")
        # Add in points to show each observation
        sns.stripplot(x="element", y="rerr", data=frame, ax=ax, order=order, hue='struct_type',
        #              jitter=True, size=5, color=".3", linewidth=0)
                      jitter=0, size=4, color=".3", linewidth=0, palette=sns.color_palette("muted"))

        sns.despine(left=True)
        ax.set_ylabel("Relative error %")

        labels = ax.get_xticklabels()
        ticks = ax.get_xticks()
        ticks1 = range(min(ticks), max(ticks)+1, 2)
        ticks2 = range(min(ticks) + 1, max(ticks)+1, 2)
        labels1 = [labels[i].get_text() for i in ticks1]
        labels2 = [labels[i].get_text() for i in ticks2]

        #       ax.tick_params(which='both', direction='out')
        #ax.set_ylim(-1, 1)
        ax.set_xticks(ticks1)
        ax.set_xticklabels(labels1, rotation=90)
        ax2 = ax.twiny()
        ax2.set_zorder(-1)
        ax2.set_xticks(ticks2)
        ax2.set_xticklabels(labels2, rotation=90)
        ax2.set_xlim(ax.get_xlim())

        ax.grid(True)
        return fig

Beispiel #20

0

Datei anzeigen

Datei: plotDNAA.py Projekt: sskutnik/SLDGAA

def plot_scatterBox(df,xData,yData,title,fileName,plotAspect=1,colorVal=None):
    plt.figure(figsize=(6*plotAspect,6))
    if(colorVal):
        sns_plot = sns.boxplot(x=xData,y=yData,data=df,color=colorVal)
        sns.stripplot(x=xData,y=yData,size=9,data=df,color=colorVal,edgecolor='gray',linewidth=1)
    else:
        sns_plot = sns.boxplot(x=xData,y=yData,data=df)
        sns.stripplot(x=xData,y=yData,size=9,data=df,edgecolor='gray',linewidth=1)
    
    plt.title(title)
    fig = sns_plot.get_figure()
    process_plot(fileName)

Beispiel #21

0

Datei anzeigen

Datei: detect_rap.py Projekt: drsaunders/RapDetector

def plot_feature_importance(features, fitted_forest):
    """Using a fitted random forest, make a cleveland dot plot of the computed feature importances. """
    plt.figure()
    vals = fitted_forest.feature_importances_
    sortorder = np.flipud(np.argsort(vals))
    features = np.array(features)
    with sns.axes_style("whitegrid"):
        sns.stripplot(y=features[sortorder], x=vals[sortorder], orient="h", color='red', size=10)
    xl = plt.xlim()
    plt.xlim(0,xl[1])
    plt.grid(axis='y',linestyle=':')
    plt.xlabel('Feature importance score')

Beispiel #22

0

Datei anzeigen

Datei: pseudos.py Projekt: ebousq/pseudo_dojo

    def plot_hints(self, with_soc=False, **kwargs):
        # Build pandas dataframe with results.
        rows = []
        for p in self:
            if not p.has_dojo_report:
                cprint("Cannot find dojo_report in %s" % p.basename, "magenta")
                continue
            report = p.dojo_report
            row = {att: getattr(p, att) for att in ("basename", "symbol", "Z", "Z_val", "l_max")}

            # Get deltafactor data with/without SOC
            df_dict = report.get_last_df_results(with_soc=with_soc)
            row.update(df_dict)
            for struct_type in ["fcc", "bcc"]:
                gbrv_dict = report.get_last_gbrv_results(struct_type, with_soc=with_soc)
            row.update(gbrv_dict)

            # Get the hints
            hint = p.hint_for_accuracy(accuracy="normal")
            row.update(dict(ecut=hint.ecut, pawecutdg=hint.pawecutdg))

            rows.append(row)

        import pandas as pd
        frame = pd.DataFrame(rows)

        def print_frame(x):
            import pandas as pd
            with pd.option_context('display.max_rows', len(x),
                                   'display.max_columns', len(list(x.keys()))):
                print(x)

        print_frame(frame)
        # Create axes
        #import matplotlib.pyplot as plt

        import seaborn as sns
        ax, fig, plt = get_ax_fig_plt(ax=None)

        #order = sort_symbols_by_Z(set(frame["element"]))

        # Box plot
        ax = sns.boxplot(x="symbol", y="ecut", data=frame, ax=ax, #order=order,
                         whis=np.inf, color="c")
        # Add in points to show each observation
        sns.stripplot(x="symbol", y="ecut", data=frame, ax=ax, #order=order,
                      jitter=True, size=5, color=".3", linewidth=0)

        sns.despine(left=True)
        ax.set_ylabel("Relative error %")
        ax.grid(True)

        return fig

Beispiel #23

0

Datei anzeigen

Datei: plotting.py Projekt: agartland/cycluster

def outcomeBoxplot(cyDf, cyVar, outcomeVar, printP=True, axh=None):
    if axh is None:
        axh = plt.gca()
    axh.cla()
    sns.boxplot(y=cyVar, x=outcomeVar, data=cyDf, ax=axh, order=[0,1])
    sns.stripplot(y=cyVar, x=outcomeVar, data=cyDf, jitter=True, ax=axh, order=[0,1])
    plt.xticks([0,1], ['False', 'True'])
    if printP:
        tmp = cyDf[[cyVar, outcomeVar]].dropna()
        z, pvalue = stats.ranksums(tmp[cyVar].loc[tmp[outcomeVar] == 1], tmp[cyVar].loc[tmp[outcomeVar] == 0])
        annParams = dict(textcoords='offset points', xytext=(0,-5), ha='center', va='top', color='black', weight='bold', size='medium')
        plt.annotate('p = %1.3g' % pvalue, xy=(0.5,plt.ylim()[1]), **annParams)
    plt.show()

Beispiel #24

0

Datei anzeigen

Datei: plotting.py Projekt: agartland/cycluster

def plotCrossCompartmentBoxplot(cyDfA, cyDfB):
    rho,pvalue,qvalue = crosscorr(cyDfA[sorted(cyDfA.columns)], cyDfB[sorted(cyDfB.columns)])
        
    s = [rho.loc[i,j] for i,j in itertools.product(rho.index, rho.columns) if i == j]
    d = [rho.loc[i,j] for i,j in itertools.product(rho.index, rho.columns) if i != j]
    a = pd.DataFrame({'Group':['Same']*len(s) + ['Different']*len(d), '$\\rho$':s+d})
    
    plt.clf()
    sns.boxplot(x='Group', y='$\\rho$', data=a)
    sns.stripplot(x='Group', y='$\\rho$', data=a, jitter=True)
    plt.xlabel('')
    plt.ylim((-1,1))
    plt.tight_layout()

Beispiel #25

0

Datei anzeigen

Datei: DataAnalysisTool0.3.py Projekt: williamzhuang/gcms-data-analysis

def parseExcelManual(filename):
    df = pd.read_excel(filename)
    print("We found the following columns:")
    print(df.columns.values)
    print("")
    xcol = input("Which column would you like to occupy the x-axis?: ")
    
    while xcol not in df.columns.values:
        print("Invalid column name")
        xcol = input("Please enter a valid column name: ")

    ycol = input("Which column would you like to occupy the y-axis?: ")
    while ycol not in df.columns.values:
        print("Invalid column name")
        ycol = input("Please enter a valid column name: ")

    title = ycol + " vs " + xcol
    
    # making a new series, where xcol is the label, and then makes correspondence
    # between the label and the value
    d = {xcol: df[xcol], ycol: df[ycol]}
    
    # then put it into a data frame
    reorg = pd.DataFrame(data=d)
    
    # dropna removes anything thats without a value (NaN)
    reorg = reorg.dropna(axis = 0)
    
    # flips/trasnposes to make it easier to work with 
    reorg = reorg.pivot_table(ycol, xcol, reorg.index)
    reorg = reorg.T

    box = input("Do you want a boxplot? (y/n): ")
    normalize = input("Do you want to normalize the y-axis? (y/n): ")
    if normalize == 'y':
        # amax is the max value of the values, fills NaN with zeroes
        reorg = reorg / np.amax(reorg.fillna(0).values)
        title = title + " Normalized"
        
    if box == "y":
        makeBoxplot(reorg)
    sns.stripplot(data=reorg, size = 7, jitter = True, palette = sns.color_palette("Set1", n_colors=8, desat=.9))

    plt.ylabel(ycol)
    plt.xlabel(xcol)
    plt.title(title)
    plt.gca().set_ylim(bottom = 0)

    exportExcel(reorg);

    plt.show()

Beispiel #26

0

Datei anzeigen

Datei: Main.py Projekt: J-Sieber/Forest-Cover-Type

def Create_Plot(X,y):
    #Creates strip plot of x and y
    xlab = X.name
    ylab = y.name
    xlab = xlab.replace("_"," ")
    ylab = ylab.replace("_"," ")
    figlab = ylab + " vs " + xlab
    filelab =  "Plots/" + figlab.replace(" ","") + ".pdf"
    f, ax = plt.subplots(figsize=(5, 5))
    sns.stripplot(x = X, y = y, jitter = True, size = 5, linewidth = 0.1, ax = ax)
    sns.plt.title(figlab)
    sns.plt.xlabel(xlab)
    sns.plt.ylabel(ylab)
    savefig(filelab)

Beispiel #27

0

Datei anzeigen

Datei: RMSD_Analysis_Mutant_PDBs.py Projekt: jaaamessszzz/DDGBenchmarking

    def BB_vs_Sidechain():
        # Make bins for BB RMSDs
        number_of_bins = 5
        bin_size = len(bb_vs_sidechain_df['WT-Mutant Backbone RMSD']) / number_of_bins + 1

        # Assign arbitrary bin identifiers for BB Group
        for index, row in bb_vs_sidechain_df.iterrows():
            bb_vs_sidechain_df.loc[index, 'BB Group'] = ((index + 1) // bin_size)
        # Find bin boundaries for BB group and add to dict
        bin_rename_dict = {}
        for name, group in bb_vs_sidechain_df.groupby('BB Group'):
            bin_rename_dict[name] = '%s -\n%s' % (group['WT-Mutant Backbone RMSD'].iloc[0], group['WT-Mutant Backbone RMSD'].iloc[len(group) - 1])
        # Rename bin identifiers to bin boundary values in BB group
        for index, row in bb_vs_sidechain_df.iterrows():
            bb_vs_sidechain_df.loc[index, 'BB Group'] = bin_rename_dict[bb_vs_sidechain_df.loc[index, 'BB Group']]

        # Assign bin identifiers for DDG Group
        for DDG_type in ['Experimental DDG', 'Predicted DDG']:
            for index, row in bb_vs_sidechain_df.iterrows():
                if row[DDG_type] > 2.5 or row[DDG_type] < -2.5:
                    bb_vs_sidechain_df.loc[index, DDG_type + ' Group'] = 'Extra Large DDG (DGG > 2.5 REU or DDG < -2.5 REU)'
                elif row[DDG_type] > 1 or row[DDG_type] < -1:
                    bb_vs_sidechain_df.loc[index, DDG_type + ' Group'] = 'Large DDG (2.5 REU > DGG > 1 REU or -2.5 < DDG < -1 REU)'
                elif row[DDG_type] > 0.5 or row[DDG_type] < -0.5:
                    bb_vs_sidechain_df.loc[index, DDG_type + ' Group'] = 'Medium DDG (1 REU > DGG > 0.5 REU or -1 < DDG < -0.5 REU)'
                else:
                    bb_vs_sidechain_df.loc[index, DDG_type + ' Group'] = 'Small DDG (0.5 REU > DDG > -0.5 REU)'

            sns.set_style('white', {'axes.grid': True, 'axes.edgecolor': '0'})
            sns.set_context('paper', font_scale=1.5, rc={'lines.linewidth': 1})

            fig, ax = plt.subplots(figsize=(20, 10))
            fig.suptitle('WT PDB - Mutant PDB Neighborhood Backbone RMSD vs. \nMutant PDB - RosettaOut Point Mutant Residues All-Atom RMSD', fontsize = 24, y=1.0)
            with sns.cubehelix_palette(number_of_bins, start=0.5, rot=-.75):
                sns.boxplot(x=bb_vs_sidechain_df['BB Group'],
                            y=bb_vs_sidechain_df['Point Mutant RMSD'],
                            ax=ax
                            )
            with sns.color_palette("husl", number_of_bins):
                sns.stripplot(x='BB Group',
                              y='Point Mutant RMSD',
                              hue= DDG_type + ' Group',
                              data=bb_vs_sidechain_df,
                              jitter=True,
                              ax=ax
                              )

            ax.set(xlabel='WT PDB - Mutant PDB Neighborhood Backbone RMSD', ylabel='Mutant PDB - RosettaOut Point Mutant Residues All-Atom RMSD')
            output_pdf.savefig(fig, pad_inches=1, bbox_inches='tight')

Beispiel #28

0

Datei anzeigen

Datei: frequency-plot.py Projekt: jerome-white/nyc-traffic

def pltvar(data, labels, stem):
    (xlabel, ylabel) = labels

    kwargs = { 'x': xlabel, 'y': 'deviation', 'data': df }
    sns.boxplot(palette="PRGn", whis=np.inf, **kwargs)
    sns.stripplot(jitter=True, size=3, color='.3', linewidth=0, **kwargs)
    
    ax = plt.gca()
    ax.set_xlabel(xlabel.title() + ' window (minutes)')
    ax.set_ylabel(ylabel.title() + ' window std. dev. (jams/day)')

    fname = '-'.join([ 'variance', xlabel, stem ])
    dest = source.joinpath(fname).with_suffix('.png')
    plt.savefig(str(dest))
    plt.close()

Beispiel #29

0

Datei anzeigen

Datei: recipe_recommendation.py Projekt: lingcheng99/Flavor-Network

def plot_similardishes(idx,xlim):
    match = yum_ingr2.iloc[yum_cos[idx].argsort()[-21:-1]][::-1]
    newidx = match.index.get_values()
    match['cosine'] = yum_cos[idx][newidx]
    match['rank'] = range(1,1+len(newidx))

    label1, label2 =[],[]
    for i in match.index:
        label1.append(match.ix[i,'cuisine'])
        label2.append(match.ix[i,'recipeName'])

    fig = plt.figure(figsize=(10,10))
    ax = sns.stripplot(y='rank', x='cosine', data=match, jitter=0.05,
                       hue='cuisine',size=15,orient="h")
    ax.set_title(yum_ingr2.ix[idx,'recipeName']+'('+yum_ingr2.ix[idx,'cuisine']+')',fontsize=18)
    ax.set_xlabel('Flavor cosine similarity',fontsize=18)
    ax.set_ylabel('Rank',fontsize=18)
    ax.yaxis.grid(color='white')
    ax.xaxis.grid(color='white')

    for label, y,x, in zip(label2, match['rank'],match['cosine']):
         ax.text(x+0.001,y-1,label, ha = 'left')
    ax.legend(loc = 'lower right',prop={'size':14})
    ax.set_ylim([20,-1])
    ax.set_xlim(xlim)

Beispiel #30

0

Datei anzeigen

Datei: cluster_1.py Projekt: iandriver/RNA-sequence-tools

def log2_oulierfilter(df_by_cell, plot=False):
    log2_df = np.log2(df_by_cell+1)
    top_log2 = find_top_common_genes(log2_df)
    if top_log2.empty:
        print("no common genes found")
        return log2_df, log2_df.transpose()
    log2_df2= pd.DataFrame(pd.to_numeric(log2_df, errors='coerce'))
    log_mean = top_log2.mean(axis=0).sort_values(ascending=False)
    log2_sorted = top_log2.reindex_axis(top_log2.mean(axis=0).sort_values(ascending=False).index, axis=1)
    xticks = []
    keep_col= []
    log2_cutoff = np.average(log2_sorted)-np.std(log2_sorted)
    avg_cutoff = np.average(log2_cutoff)
    for col, m in zip(log2_sorted.columns.tolist(),log2_sorted.mean()):
        if m > avg_cutoff:
            keep_col.append(col)
            xticks.append(col+' '+str("%.2f" % m))
    filtered_df_by_cell = df_by_cell[keep_col]
    filtered_df_by_gene = filtered_df_by_cell.transpose()
    filtered_log2 = np.log2(filtered_df_by_cell[filtered_df_by_cell>0])
    if plot:
        ax = sns.boxplot(data=filtered_log2, whis= .75, notch=True)
        ax = sns.stripplot(x=filtered_log2.columns.values, y=filtered_log2.mean(axis=0), size=4, jitter=True, edgecolor="gray")
        xtickNames = plt.setp(ax, xticklabels=xticks)
        plt.setp(xtickNames, rotation=90, fontsize=9)
        plt.show()
        plt.clf()
        sns.distplot(filtered_log2.mean())
        plt.show()
    log2_expdf_cell = np.log2(filtered_df_by_cell+1)
    log2_expdf_gene = log2_expdf_cell.transpose()
    return log2_expdf_cell, log2_expdf_gene

Beispiel #31

0

Datei anzeigen

Datei: load_aggregated_results.py Projekt: homaralex/disentanglement_lib

def plot_results(
    df,
    reg_weight_col,
    out_dir,
    dataset,
):
    fig_violin, axes_violin = plt.subplots(nrows=3, ncols=4, figsize=(30, 30))
    fig_box, axes_box = plt.subplots(nrows=3, ncols=4, figsize=(30, 30))
    fig_mean, axes_mean = plt.subplots(nrows=3, ncols=4, figsize=(30, 30))
    for metric, ax_violin, ax_box, ax_mean in zip(
            DIS_METRICS,
            axes_violin.flatten(),
            axes_box.flatten(),
            axes_mean.flatten(),
    ):
        metric_df, metric = get_metric_df(df, metric)
        print()
        print(
            metric_df.groupby(MODEL_COL_STR)
            [metric].mean().reset_index().sort_values(metric, ascending=False))

        metric_df = metric_df.sort_values(reg_weight_col)

        sns.violinplot(
            x=reg_weight_col,
            y=metric,
            data=metric_df,
            cut=0,
            ax=ax_violin,
        )
        for tick in ax_violin.get_xticklabels():
            tick.set_rotation(45)

        sns.boxplot(
            x=reg_weight_col,
            y=metric,
            data=metric_df,
            ax=ax_box,
        )
        for tick in ax_box.get_xticklabels():
            tick.set_rotation(45)

        # group and aggregate to obtain means per model
        metric_df = metric_df.groupby(reg_weight_col)[metric].mean()
        sns.stripplot(
            x=list(map("{:.2E}".format, metric_df.index.values)),
            y=metric_df.values,
            ax=ax_mean,
            size=25,
        )
        ax_mean.set_ylabel(metric)

        for tick in ax_mean.get_xticklabels():
            tick.set_rotation(45)

    fig_violin.savefig(out_dir / f'{dataset}_violin.png')
    fig_box.savefig(out_dir / f'{dataset}_box.png')
    fig_mean.savefig(out_dir / f'{dataset}_mean.png')

    for fig in (fig_violin, fig_box, fig_mean):
        plt.close(fig)

Beispiel #32

0

Datei anzeigen

sns.palplot(sns.cubehelix_palette(n_colors=8, start=1.7, rot=0.2, dark=0, light=.95, reverse=True))


# *start* is always between 0 and 3. *rot* an abbreviation for rotation is kept between -1 and 1. *reverse* converses the color ordering and *hue* refers to plot appearance.

# ## Generic Seaborn Plots:

# In[36]:


# Loading up built-in dataset:
tips = sns.load_dataset("tips")

# Creating Strip plot for day-wise revenue:
sns.stripplot(x="day", y="total_bill", data=tips, color="g")


# This does the job for us but let us try to get better results by plotting each day in different color instead of same color. For this, we shall replace `color` parameter with `palette` parameter:

# In[40]:


# Set Theme:
sns.set_style('whitegrid')

# Creating Strip plot for day-wise revenue:
sns.swarmplot(x="day", y="total_bill", data=tips, palette="viridis")


# In[ ]:

Beispiel #33

0

Datei anzeigen

Datei: mercari_search_20201124160007.py Projekt: KustomApe/nerdape

            try:
                if len(item_box.find_elements_by_css_selector(".item-sold-out-badge")) > 0:
                    sold = "SOLD"
                else:
                    sold = "NOT SOLD"
                sub_title = item_box.find_element_by_class_name("items-box-body")
                title = sub_title.find_element_by_tag_name("h3").text
                item_price = item_box.find_element_by_css_selector(".items-box-price")
                price_text = item_price.text
                price_text = re.sub(r",", "", price_text).lstrip("¥ ")
                price_text_int = int(price_text)
                print(price_text_int)
                url = item_box.find_element_by_tag_name("a").get_attribute("href")
                data  = pd.Series( [ sold,title,price_text_int,url ], index=df_main.columns )
                grdata = pd.Series( [ sold,price_text_int ], index=df_graf.columns )
                df_main = df_main.append( data, ignore_index=True )
                df_graf = df_graf.append( grdata, ignore_index=True )
            except Exception as e:
                print(e)
    else:
        print('No items anymore...')
        break

print(df_main)
sns.stripplot(x='SOLD', y='PRICE', data=df_graf)
plt.show()
sns.pairplot(df_graf,hue="SOLD")
plt.show()
print('Writing out to CSV file...')
df_main.to_csv("pricedata.csv", encoding="utf_8_sig")
print("Done")

Beispiel #34

0

Datei anzeigen

Datei: 20200128_erosion_dilation_annotation_analysis.py Projekt: marcelomata/lightsheet_helper_scripts

plt.plot(np.array(all_struct_voxels), np.array(all_struct_voxels), color = "gray", linestyle = "dashdot", linewidth = 1) # identity line
plt.ylabel("Voxels in 80um eroded volume")
plt.xlabel("Voxels in original volume")
plt.xlim([0,250000]);plt.ylim([0, 150000])
plt.savefig(os.path.join(fig_dst, "voxels_scatter_org_vs_eroded_250000_voxels.pdf"), bbox_inches = "tight")

#%%

missing_struct_voxels_sort = np.sort(np.array(missing_struct_voxels))
missing_struct_names_sort = np.array(missing_struct_names)[np.argsort(np.array(missing_struct_voxels))]

df = pd.DataFrame()
df["num_voxels"] = missing_struct_voxels+all_struct_voxels
df["type"] = ["eroded"]*len(missing_struct_voxels) + ["original"]*len(all_struct_voxels)

sns.stripplot(x = "num_voxels", y = "type", data = df,  color = "crimson", orient = "h")
sns.boxplot(x = "num_voxels", y = "type", data = df, orient = "h", showfliers=False, showcaps=False, 
            boxprops={'facecolor':'None'})
plt.xlim([0, 200000])
plt.xlabel("Total number of voxels in structure")
plt.ylabel("Structures 'zero'ed' out vs. all original structures")
plt.savefig(os.path.join(fig_dst, "boxplot_total_voxels_org_vs_eroded.pdf"), bbox_inches = "tight")

#%%
#export missing structures name, id, and total voxel count

dataf = pd.DataFrame()
dataf["name"] = missing_struct_names
dataf["id"] = missing_struct_ids
dataf["parent_name"] = missing_struct_parents
dataf["voxels_in_structure"] = missing_struct_voxels

Beispiel #35

0

Datei anzeigen

        return []
    return insert_packet(spreading_factor - 1) + [spreading_factor] + insert_packet(spreading_factor - 1)

sf_as_category = pd.Categorical(insert_packet(12), categories=[7, 8, 9, 10, 11, 12], ordered=True)
pyramid = pd.DataFrame({'SF': sf_as_category})
pyramid['seq_num'] = pyramid.index
cmap = sns.color_palette('Blues_d', 6)
fig, ax = plt.subplots(figsize=(4, 3))
plot = sns.scatterplot(x=pyramid.index, y='SF', data=pyramid, hue='SF', legend=False, palette=cmap, ax=ax)
plot.set_title('Spreading factor sequence')
plot.set_ylabel('spreading factor')
plot.set_xlabel('sequence number')
fig.savefig("sf-sequence.svg")

lora_mons_static = pd.read_pickle('data/lora_mons_static_clean.pkl.gz')
channel = lora_mons_static.query('gtw_id == "eui-0000024b08030186"')[['received', 'dev_id', 'rssi', 'snr', 'data_rate']].set_index('received').sort_index()
channel.index = channel.index.tz_convert('Europe/Brussels')
channel['spreading_factor'] = channel['data_rate'].str.extract('SF([0-9]+)BW').astype(dtype=np.int64)

ax = sns.stripplot(x='spreading_factor', y='rssi', data=channel, alpha=0.3)
ax.set(ylabel='RSSI (dBm)', xlabel='Spreading Factor', title='Distribution of received packets RSSI');
ax.figure.savefig('rssi_sf.png')

ax = sns.stripplot(x='spreading_factor', y='snr', data=channel, alpha=0.3)
ax.set(ylabel='SNR (dB)', xlabel='Spreading Factor', title='Distribution of received packets SNR');
ax.figure.savefig('snr_sf.png')

ax = sns.scatterplot(x='snr', y='rssi', data=channel, alpha=0.3)
ax.set(xlabel='SNR (dB)', ylabel='RSSI (dBm)');
ax.figure.savefig('rssi_snr.png')

Beispiel #36

0

Datei anzeigen

    fig.suptitle(f"{name} ({n_verts})", fontsize=40, y=1.04)
    plt.tight_layout()
    stashfig(f"{g}-gridplot-sf-sorted")
    print()

#%%

shuffle_df = pd.DataFrame(shuffled_triu_outs)
true_df = pd.DataFrame(true_triu_outs)
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
ax = sns.stripplot(
    data=shuffle_df,
    x="Graph",
    y="Proportion",
    linewidth=1,
    alpha=0.4,
    jitter=0.3,
    size=5,
    ax=ax,
)
# ax = sns.violinplot(data=shuffle_df, x="Graph", y="Proportion", ax=ax)

ax = sns.stripplot(
    data=true_df,
    x="Graph",
    y="Proportion",
    marker="_",
    linewidth=2,
    s=90,
    ax=ax,
    label="True",

Beispiel #37

0

Datei anzeigen

    pickle.dump(model,f)
training['prob']=model.predict_proba(training[features])[:,1]
testing['prob']=model.predict_proba(testing[features])[:,1]
oot_data['prob']=model.predict_proba(oot_data[features])[:,1]
build_cut,bins = model_method.ks_lift_chart(training['y'],training['prob'],'train')
test_cut=model_method.ks_lift_chart(testing['y'],testing['prob'],'testing',bins=bins)
oot_cut = model_method.ks_lift_chart(oot_data['y'],oot_data['prob'],'oot',bins=bins)

month_ks=model_method.month_ks(testing,'app_date')

feature_results = var_cut.get_feature_result(training[features+['y']],'y')

importance_df = model_method.get_xgboost_importances(model,return_df=True)
data_var = model_method.var_avg_plot([training,testing,oot_data],importance_df.index.tolist()[:10],q=10)
model_method.var_lift_plot(training['y'],training['xx2337'],'xx2337')

model_method.var_cut_plot([training,testing],importance_df.index.tolist()[:10],q=10)

var_psi_ie = model_method.var_psi_chart(training,testing,importance_df.index.tolist()[:10],'app_date')

#model_method.get_plot_tree(model)
#PSI(testing[testing['y']==1]['prob'],training[training['y']==1]['prob'])
#for i in importance_df.index.tolist()[:10]:
#    print(PSI(testing[i],training[i]))

import seaborn as sns
sns.set_style('whitegrid')
sns.stripplot(x='app_date',y='xx2392',hue='y',data=training,jitter = True,dodge=True)

Beispiel #38

0

Datei anzeigen

Datei: taxa_abundance_box_plot.py Projekt: sbslee/dokdo

def taxa_abundance_box_plot(
    taxa, metadata=None, hue=None, hue_order=None,
    add_datapoints=False, level=1, by=None, ax=None,
    figsize=None, count=0, exclude_samples=None,
    include_samples=None, exclude_taxa=None, sort_by_names=False,
    sample_names=None, csv_file=None, size=5, pseudocount=False,
    taxa_names=None, brief_xlabels=False, show_means=False,
    meanprops=None, show_others=True, sort_by_mean=True,
    jitter=1, alpha=None, artist_kwargs=None
):
    """Create a taxa abundance box plot.

    +----------------+-----------------------------------------------------+
    | q2-taxa plugin | Example                                             |
    +================+=====================================================+
    | QIIME 2 CLI    | qiime taxa barplot [OPTIONS]                        |
    +----------------+-----------------------------------------------------+
    | QIIME 2 API    | from qiime2.plugins.taxa.visualizers import barplot |
    +----------------+-----------------------------------------------------+

    Parameters
    ----------
    taxa : str or qiime2.Visualization
        Visualization file or object from the q2-taxa plugin.
    metadata : str or qiime2.Metadata, optional
        Metadata file or object.
    hue : str, optional
        Grouping variable that will produce boxes with different colors.
    hue_order : list, optional
        Specify the order of categorical levels of the 'hue' semantic.
    add_datapoints : bool, default: False
        Show datapoints on top of the boxes.
    level : int, default: 1
        Taxonomic level at which the features should be collapsed.
    by : list, optional
        Column name(s) to be used for sorting the samples. Using 'sample-id'
        will sort the samples by their name, in addition to other column
        name(s) that may have been provided. If multiple items are provided,
        sorting will occur by the order of the items.
    ax : matplotlib.axes.Axes, optional
        Axes object to draw the plot onto, otherwise uses the current Axes.
    figsize : tuple, optional
        Width, height in inches. Format: (float, float).
    count : int, default: 0
        The number of taxa to display. When 0, display all.
    exclude_samples : dict, optional
        Filtering logic used for sample exclusion.
        Format: {'col': ['item', ...], ...}.
    include_samples : dict, optional
        Filtering logic used for sample inclusion.
        Format: {'col': ['item', ...], ...}.
    exclude_taxa : list, optional
        The taxa names to be excluded when matched. Case insenstivie.
    sort_by_names : bool, default: False
        If true, sort the columns (i.e. species) to be displayed by name.
    sample_names : list, optional
        List of sample IDs to be included.
    csv_file : str, optional
        Path of the .csv file to output the dataframe to.
    size : float, default: 5.0
        Radius of the markers, in points.
    pseudocount : bool, default: False
        Add pseudocount to remove zeros.
    taxa_names : list, optional
        List of taxa names to be displayed.
    brief_xlabels : bool, default: False
        If true, only display the smallest taxa rank in the x-axis labels.
    show_means : bool, default: False
        Add means to the boxes.
    meanprops : dict, optional
        The meanprops argument as in matplotlib.pyplot.boxplot.
    show_others : bool, default: True
        Include the 'Others' category.
    sort_by_mean : bool, default: True
        Sort taxa by their mean relative abundance after sample filtration.
    jitter : float, default: 1
        Amount of jitter (only along the categorical axis) to apply.
    alpha : float, optional
        Proportional opacity of the points.
    artist_kwargs : dict, optional
        Keyword arguments passed down to the _artist() method.

    Returns
    -------
    matplotlib.axes.Axes
        Axes object with the plot drawn onto it.

    See Also
    --------
    taxa_abundance_bar_plot
    addpairs

    Examples
    --------
    Below is a simple example showing taxonomic abundance at the phylum
    level (i.e. ``level=2``).

    >>> qzv_file = '/Users/sbslee/Desktop/dokdo/data/moving-pictures-tutorial/taxa-bar-plots.qzv'
    >>> dokdo.taxa_abundance_box_plot(qzv_file, level=2, figsize=(8, 7))
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_box_plot-1.png

    We can control how many taxa to display with ``count``. Also, we can
    make the x-axis tick labels pretty with ``brief_xlabels``. We can
    manually set the x-axis tick labels with ``xticklabels``. Lastly, we
    can select specific taxa to display with ``taxa_names``.

    >>> fig, [[ax1, ax2], [ax3, ax4]] = plt.subplots(2, 2, figsize=(10, 10))
    >>> kwargs = {'level' : 2}
    >>> artist_kwargs1 = dict(title='count=4')
    >>> artist_kwargs2 = dict(title='brief_xlabels=True')
    >>> artist_kwargs3 = dict(xticklabels=['A', 'B', 'C', 'D'], title="xticklabels=['A', 'B', 'C', 'D']")
    >>> artist_kwargs4 = dict(title="taxa_names=[...]")
    >>> dokdo.taxa_abundance_box_plot(qzv_file, ax=ax1, count=4, artist_kwargs=artist_kwargs1, **kwargs)
    >>> dokdo.taxa_abundance_box_plot(qzv_file, ax=ax2, count=4, brief_xlabels=True, artist_kwargs=artist_kwargs2, **kwargs)
    >>> dokdo.taxa_abundance_box_plot(qzv_file, ax=ax3, count=4, artist_kwargs=artist_kwargs3, **kwargs)
    >>> dokdo.taxa_abundance_box_plot(qzv_file, ax=ax4, taxa_names=['k__Bacteria;p__Firmicutes', 'k__Bacteria;p__Proteobacteria'], artist_kwargs=artist_kwargs4, **kwargs)
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_box_plot-2.png

    We can group the boxes by a metadata column with ``hue``. For this
    plot, we will draw the y-axis in log scale with ``ylog``. To do
    this, we actually need to adjust the y-axis limits with ``ymin``
    and ``ymax``, and also add a pseudocount of 1 to remove 0s with
    ``pseudocount`` (because 0s cannot be shown in log scale). We will
    also add data points with ``add_datapoints=True``.

    >>> artist_kwargs = dict(ylog=True, ymin=0.05, ymax=200, show_legend=True)
    >>> dokdo.taxa_abundance_box_plot(qzv_file,
    ...                               level=2,
    ...                               figsize=(10, 7),
    ...                               hue='body-site',
    ...                               size=3,
    ...                               count=4,
    ...                               pseudocount=True,
    ...                               add_datapoints=True,
    ...                               artist_kwargs=artist_kwargs)
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_box_plot-3.png
    """
    with tempfile.TemporaryDirectory() as t:
        _parse_input(taxa, t)
        df = pd.read_csv(f'{t}/level-{level}.csv', index_col=0)

    # If provided, update the metadata.
    if metadata is None:
        pass
    else:
        mf = dokdo.get_mf(metadata)
        cols = _get_mf_cols(df)
        df.drop(columns=cols, inplace=True)
        df = pd.concat([df, mf], axis=1, join='inner')

    df["sample-id"] = df.index

    # If provided, sort the samples for display in the x-axis.
    if by:
        df = df.sort_values(by=by)

    # If provided, exclude the specified taxa.
    if isinstance(exclude_taxa, list):
        dropped = []
        for tax in exclude_taxa:
            for col in df.columns:
                if tax.lower() in col.lower():
                    dropped.append(col)
        dropped = list(set(dropped))
        df = df.drop(columns=dropped)

    # Remove the metadata columns.
    cols = _get_mf_cols(df)
    mf = df[cols]
    df = df.drop(columns=cols)

    df, mf = _filter_samples(df, mf, exclude_samples, include_samples)

    # If provided, only include the specified samples.
    if isinstance(sample_names, list):
        df = df.loc[sample_names]
        mf = mf.loc[sample_names]

    if sort_by_mean:
        df = _sort_by_mean(df)

    if ax is None:
        fig, ax = plt.subplots(figsize=figsize)

    # Add a pseudocount.
    if pseudocount:
        df = df + 1

    # Convert counts to proportions.
    df = df.div(df.sum(axis=1), axis=0)

    df = _get_others_col(df, count, taxa_names, show_others)

    if sort_by_names:
        df = df.reindex(sorted(df.columns), axis=1)

    _taxa_names = df.columns

    df = df * 100

    if hue is not None:
        df2 = pd.concat([df, mf[hue]], axis=1, join='inner')
        df2 = pd.melt(df2, id_vars=[hue])
    else:
        df2 = pd.melt(df)



    if meanprops:
        _meanprops = meanprops
    else:
        _meanprops={'marker':'x',
                    'markerfacecolor':'white',
                    'markeredgecolor':'white',
                    'markersize':'10'}

    d = {}

    if show_means:
        d['showmeans'] = True
        d['meanprops'] = _meanprops

    sns.boxplot(x='variable',
                y='value',
                hue=hue,
                hue_order=hue_order,
                data=df2,
                ax=ax,
                **d)

    if add_datapoints:
        remove_duplicates = True
        # Alternative method: sns.swarmplot()
        sns.stripplot(x='variable',
                      y='value',
                      hue=hue,
                      hue_order=hue_order,
                      data=df2,
                      ax=ax,
                      color='black',
                      size=size,
                      dodge=True,
                      jitter=jitter,
                      alpha=alpha)
    else:
        remove_duplicates = False

    # If provided, output the dataframe as a .csv file.
    if csv_file is not None:
        df3 = pd.concat([df, mf], axis=1, join='inner')
        df3.to_csv(csv_file)

    if brief_xlabels:
        xticklabels = [dokdo.pname(x.get_text()) for x in ax.get_xticklabels()]
    else:
        xticklabels = None

    if artist_kwargs is None:
        artist_kwargs = {}

    artist_kwargs = {'xrot': 45,
                     'xha': 'right',
                     'xlabel': '',
                     'ylabel': 'Relative abundance (%)',
                     'xticklabels': xticklabels,
                     'remove_duplicates': remove_duplicates,
                     **artist_kwargs}

    if hue is not None:
        artist_kwargs['legend_title'] = hue

    ax = _artist(ax, **artist_kwargs)

    return ax

Beispiel #39

0

Datei anzeigen

Datei: hair_dryer.py Projekt: lukaswangbk/Amazon-Product-Sales-Analysis

warnings.filterwarnings('ignore')
plt.rcParams['figure.figsize'] = (20, 10)
plt.style.use('fivethirtyeight')
sns.boxplot(df_hair_dryer['star_rating'], df_hair_dryer['length'], palette = 'Blues')
plt.title("Relations between Review Length and Star Rating", fontsize = 50)
plt.show()

# Stripplot
warnings.filterwarnings('ignore')
plt.rcParams['figure.figsize'] = (20, 10)
plt.style.use('fivethirtyeight')
plt.xlabel('star_rating', fontsize = 50)
plt.ylabel('review_length', fontsize = 50)
plt.xticks(fontsize=40)
plt.yticks(fontsize=40)
sns.stripplot(df_hair_dryer['star_rating'], df_hair_dryer['length'], palette = 'Reds')
plt.title("Relations between Review Length and Star Rating", fontsize = 60)
plt.show()

'''
---------------------------------2----------------------------------
'''

'''
------------------------------Part a--------------------------------
'''

# Cleaning the reviews
import re
import nltk
nltk.download('stopwords')

Beispiel #40

0

Datei anzeigen

Datei: figure_9_plotting.py Projekt: dpshorten/CoTETE_experiments

for l in range(len(links)):
   for r in range(num_runs):
      if 1 in (surrogates[l, r, :] > TE[l, r]):
         p_vals[l, r] = 1-(np.argmax(surrogates[l, r, :] > TE[l, r])/num_surrogates)
      else:
         p_vals[l, r] = 0

print(p_vals)
p_vals = np.delete(p_vals, obj = 1, axis = 1)
print(p_vals)

fig, axs = plt.subplots(figsize = (6, 6))
#sns.boxplot(data = np.transpose(p_vals[:, :]), palette = "Set3", linewidth = 2, width = 0.5, fliersize = 4)
sns.boxplot(data = np.transpose(p_vals[:, :]), palette = "colorblind",
             linewidth = 4, width = 0.5, fliersize = 0)
sns.stripplot(data = np.transpose(p_vals[:, :]), palette = "colorblind",
             linewidth = 3, size = 10)
plt.hlines(0.05, -0.5, 5.5, color = "black", linewidth = 2, linestyle='--')
plt.xticks([0, 1, 2, 3, 4, 5], LINKS)

#plt.xlabel("connection")
plt.ylabel("p value")
plt.ylim([-0.1, 1.19])

#for i in [0, 1, 2, 3, 5]:
for i in range(6):
   plt.scatter(i, 1.1, s=1000, c='green', marker='$✓$')
#for i in [4]:
#   plt.scatter(i, 1.1, s=1000, c='red', marker='$×$')


plt.tight_layout()

Beispiel #41

0

Datei anzeigen

Datei: data_frames.py Projekt: swigder/bilingual-search-engine

 def df_function(collection_df, attribute, ax):
     sns.stripplot(x=attribute, y=SCORE, hue=split, data=collection_df,
                   order=sorted(collection_df[attribute].unique()),
                   jitter=1, dodge=True, alpha=0.5, ax=ax)

Beispiel #42

0

Datei anzeigen

Datei: data_frames.py Projekt: swigder/bilingual-search-engine

 def df_function(collection_df, ax):
     hue_order_option = {'hue_order': sorted(collection_df[split].unique())} if split else {}
     sns.stripplot(x=attribute, y=SCORE, hue=split, data=collection_df,
                   order=sorted(collection_df[attribute].unique()),
                   **hue_order_option,
                   jitter=1, dodge=True, alpha=0.5, ax=ax)

Beispiel #43

0

Datei anzeigen

 ax = axs[row, 0]
 # sns.violinplot(
 #     data=neuron_df[neuron_df["neuron_type"].isin(row_neuron_types)],
 #     x="neuron_type",
 #     y=f"component_score_{i}",
 #     hue="neuron_type",
 #     palette=neuron_type_palette,
 #     ax=ax,
 #     inner=None,
 # )
 sns.stripplot(
     data=neuron_df[neuron_df["neuron_type"].isin(row_neuron_types)],
     x="neuron_type",
     y=f"component_score_{i}",
     hue="neuron_type",
     hue_order=row_neuron_types,  # ensures sorting stays the same
     order=row_neuron_types,  # ensures sorting stays the same
     palette=neuron_type_palette,
     ax=ax,
     s=2,
 )
 ax.get_legend().remove()
 ax.set(xlim=(-1, n_per_row),
        ylim=(y_min, y_max),
        xlabel="",
        ylabel="",
        yticks=[])
 ax.axhline(0, color="black", linestyle=":", linewidth=1)
 ax.tick_params(length=0)
 plt.setp(ax.get_xticklabels(), rotation=45)
 for tick in ax.get_xticklabels():

Beispiel #44

0

Datei anzeigen

Datei: ctclassification.py Projekt: SavitriPandey/clinicaltrialsML

def visulaization(cv_df):
    fig, ax = plt.subplots(figsize=(30,30))
    sns.boxplot(x='model_name', y='accuracy', data=cv_df)
    sns.stripplot(x='model_name', y='accuracy', data=cv_df, size=8, jitter=True, edgecolor="gray", linewidth=2)
    plt.show()

Beispiel #45

0

Datei anzeigen

density = density[:, 1:]
counts_per_struct = counts_per_struct[1:, :]
#%%

#boxplots for counts
import seaborn as sns

#first, rearrange structures in ASCENDING order (will be plotted as descending, -_-) by density and counts
order = np.argsort(np.median(counts_per_struct.T, axis=0))[::-1]
sois_sort = np.array(nuclei)[order][:10]

#boxplots of percent counts
plt.figure(figsize=(5, 4))
df = pd.DataFrame(pcounts)
df.columns = nuclei
g = sns.stripplot(data=df, color="dimgrey", orient="h", order=sois_sort)
sns.boxplot(data=df,
            orient="h",
            showfliers=False,
            showcaps=False,
            boxprops={'facecolor': 'None'},
            order=sois_sort)
plt.xlabel("# Neurons")
plt.ylabel("Subnucleus")
plt.savefig(os.path.join(fig_dst, "thal_counts_boxplots.pdf"),
            bbox_inches="tight")

#%%

#boxplots of density
#first, rearrange structures in ASCENDING order (will be plotted as descending, -_-) by density and counts

Beispiel #46

0

Datei anzeigen

Datei: Movie_Domestic_Gross.py Projekt: ishita7077/Python-Projects

#explore the len of categorical variable Studio, used in the assignment
len(mov.Genre.unique()

#filter the dataframe by genre
mov2 = mov[(mov.Genre == 'action') | (mov.Genre == 'adventure') | (mov.Genre == 'animation') | (mov.Genre == 'comedy') | (mov.Genre == 'drama')]

#filter the mov2 dataframe by studio
mov3 = mov2[(mov2.Studio == 'Buena Vista Studios') | (mov2.Studio == 'Fox') | (mov2.Studio == 'Paramount Pictures') | (mov2.Studio == 'Sony') | (mov2.Studio == 'Universal') | (mov2.Studio == 'WB')]

#check how the filters worked
print (mov3.Genre.unique())
print (mov3.Studio.unique())
print (len(mov3))

#define the style
sns.set(style="darkgrid", palette="muted", color_codes=True)

#plot the boxsplots
ax = sns.boxplot(data=mov3, x='Genre', y='Gross % US', orient='v', color='lightgray', showfliers=False)
plt.setp(ax.artists, alpha=0.5)

#add in points to show each observation
sns.stripplot(x='Genre', y='Gross % US', data=mov3, jitter=True, size=6, linewidth=0, hue = 'Studio', alpha=0.7)

ax.axes.set_title('Domestic Gross % by Genre',fontsize=30)
ax.set_xlabel('Genre',fontsize=20)
ax.set_ylabel('Gross % US',fontsize=20)

#define where to place the legend
ax.legend(bbox_to_anchor=(1.05, 1), loc=2)

Beispiel #47

0

Datei anzeigen

Datei: boxPlot-5min.py Projekt: lucasgabrielsilva/AnalyseCodeofConduct

})
ObjectiveC = pd.DataFrame({
    'Linguagem de Programação':
    np.repeat('Objective-C', 40),
    'Quantidade de Palavras':
    (322, 443, 446, 462, 710, 219, 446, 463, 461, 461, 764, 1059, 37, 446, 446,
     37, 446, 37, 39, 866, 462, 446, 37, 446, 666, 462, 461, 446, 461, 39, 462,
     443, 37, 443, 8, 446, 446, 461, 324, 461)
})

df = MATLAB.append(Julia).append(Clojure).append(Perl).append(ObjectiveC)

# boxplot
ax = sns.boxplot(x='Linguagem de Programação',
                 y='Quantidade de Palavras',
                 data=df)
# add stripplot
ax = sns.stripplot(x='Linguagem de Programação',
                   y='Quantidade de Palavras',
                   data=df,
                   color="orange",
                   jitter=0.2,
                   size=2.5)

# add title
plt.title(
    "Boxplot da contagem de palavras das 5 linguagens de programação com menos códigos de conduta",
    loc="left")

# show the graph
plt.show()

Beispiel #48

0

Datei anzeigen

from pydataset import data
import seaborn as sns

df = data('mtcars')
df

#%%quantiles
intervals = np.linspace(0, 1, 11)
intervals
df.mpg.sort_values()
np.sort(df.mpg)[16]
df.quantile(q=0.5, axis=0)  #columns
df.quantile(q=intervals, axis=0)  #columns
df.boxplot()
df.boxplot(column=['mpg'])
ax = sns.stripplot(x="gear", y="mpg", data=df)

#quantiles
q3, q1 = np.percentile(df['hp'], [75, 25])
q3, q1
q3 - q1

from scipy import stats
IQR = stats.iqr(df['hp'])
IQR


#define function to calculate interquartile range
def find_iqr(x):
    return np.subtract(*np.percentile(x, [75, 25]))

Beispiel #49

0

Datei anzeigen

    412, 413, 414, 415, 417, 418, 419, 421, 422, 423, 425, 426, 426, 427, 427,
    429, 430, 431, 432, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442,
    444, 445, 446, 447, 448, 449, 450, 453, 454, 455, 457, 458, 459, 460, 460,
    461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 473, 474, 475, 476,
    477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491,
    492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506,
    507, 508, 509, 510, 511, 512, 513, 514, 540
]

df = pd.DataFrame(pd_data)

df["targeted"] = [int(y) in our_loci for y in [x[12:-12] for x in df["locus"]]]

# df["evalue"].plot.kde()
# plt.xlim(0.1,0)
# plt.savefig("./tblastx_evals.png")
# plt.clf()

#evalue distribution
sns.stripplot(data=df, x="targeted", y="evalue", alpha=0.5)
plt.yscale('log', nonpositive='clip')
plt.tight_layout()
plt.savefig("./tblastx_evals.png")
plt.clf()

#bitscore distribution
sns.stripplot(data=df, x="targeted", y="bitscore", alpha=0.5)
plt.tight_layout()
plt.savefig("./tblastx_bitscores.png")
plt.clf()

Beispiel #50

0

Datei anzeigen

    X,
    y,
    cv=RepeatedKFold(n_splits=5, n_repeats=5),
    return_estimator=True,
    n_jobs=-1,
)
coefs = pd.DataFrame(
    [
        est.named_steps["transformedtargetregressor"].regressor_.coef_
        * X_train_preprocessed.std(axis=0)
        for est in cv_model["estimator"]
    ],
    columns=feature_names,
)
plt.figure(figsize=(9, 7))
sns.stripplot(data=coefs, orient="h", color="k", alpha=0.5)
sns.boxplot(data=coefs, orient="h", color="cyan", saturation=0.5)
plt.axvline(x=0, color=".5")
plt.xlabel("Coefficient importance")
plt.title("Coefficient importance and its variability")
plt.subplots_adjust(left=0.3)

# %%
# The problem of correlated variables
# -----------------------------------
#
# The AGE and EXPERIENCE coefficients are affected by strong variability which
# might be due to the collinearity between the 2 features: as AGE and
# EXPERIENCE vary together in the data, their effect is difficult to tease
# apart.
#

Beispiel #51

0

Datei anzeigen

Datei: Multinomial_Regression.py Projekt: venumadhavgandhi/Datascience-concepts

Cars.head(10)

Cars.describe()
Cars.choice.value_counts()

# Boxplot of independent variable distribution for each category of choice
sns.boxplot(x="choice", y="cost.car", data=Cars)
sns.boxplot(x="choice", y="cost.carpool", data=Cars)
sns.boxplot(x="choice", y="cost.bus", data=Cars)
sns.boxplot(x="choice", y="cost.rail", data=Cars)
sns.boxplot(x="choice", y="time.car", data=Cars)
sns.boxplot(x="choice", y="time.bus", data=Cars)
sns.boxplot(x="choice", y="time.rail", data=Cars)

# Scatter plot for each categorical choice of car
sns.stripplot(x="choice", y="cost.car", jitter=True, data=Cars)
sns.stripplot(x="choice", y="cost.carpool", jitter=True, data=Cars)
sns.stripplot(x="choice", y="cost.carpool", jitter=True, data=Cars)
sns.stripplot(x="choice", y="cost.rail", jitter=True, data=Cars)
sns.stripplot(x="choice", y="time.cars", jitter=True, data=Cars)
sns.stripplot(x="choice", y="time.bus", jitter=True, data=Cars)
sns.stripplot(x="choice", y="time.rail", jitter=True, data=Cars)

# Scatter plot between each possible pair of independent variable and also histogram for each independent variable
sns.pairplot(
    Cars, hue="choice"
)  # With showing the category of each car choice in the scatter plot
sns.pairplot(Cars)  # Normal

# Correlation values between each independent features
Cars.corr()

Beispiel #52

0

Datei anzeigen

Datei: ComputeMeasuresIdentityProfileOneMouseAutomatic.py Projekt: Ben-Girard/lmt-analysis

def plotProfileDataDuration(profileData, night, valueCat):
    fig, axes = plt.subplots(nrows=5, ncols=6, figsize=(14, 12))

    row = 0
    col = 0
    fig.suptitle(t="{} of events (night {})".format(valueCat, night),
                 y=1.2,
                 fontweight='bold')

    #plot the data for each behavioural event
    for behavEvent in behaviouralEventOneMouse[:-2]:
        event = behavEvent + valueCat
        print("event: ", event)

        profileValueDictionary = getProfileValues(profileData=profileData,
                                                  night=night,
                                                  event=event)
        y = profileValueDictionary["value"]
        x = profileValueDictionary["genotype"]
        genotypeType = Counter(x)
        group = profileValueDictionary["exp"]

        print("y: ", y)
        print("x: ", x)
        print("group: ", group)
        experimentType = Counter(group)
        print("Nb of experiments: ", len(experimentType))

        axes[row, col].set_xlim(-0.5, 1.5)
        axes[row, col].set_ylim(min(y) - 0.2 * max(y), max(y) + 0.2 * max(y))
        sns.stripplot(x, y, jitter=True, hue=group, s=5, ax=axes[row, col])
        axes[row, col].set_title(behavEvent)
        axes[row, col].set_ylabel("{} (frames)".format(valueCat))
        axes[row, col].legend().set_visible(False)
        axes[row, col].spines['right'].set_visible(False)
        axes[row, col].spines['top'].set_visible(False)

        if col < 5:
            col += 1
            row = row
        else:
            col = 0
            row += 1

    #plot the data for the total distance traveled
    profileValueDictionary = getProfileValues(profileData=profileData,
                                              night=night,
                                              event="totalDistance")
    y = profileValueDictionary["value"]
    x = profileValueDictionary["genotype"]
    genotypeType = Counter(x)
    group = profileValueDictionary["exp"]

    print("y: ", y)
    print("x: ", x)
    print("group: ", group)
    experimentType = Counter(group)
    print("Nb of experiments: ", len(experimentType))

    axes[row, col].set_xlim(-0.5, 1.5)
    axes[row, col].set_ylim(min(y) - 0.2 * max(y), max(y) + 0.2 * max(y))
    sns.stripplot(x, y, jitter=True, hue=group, s=5, ax=axes[row, col])
    axes[row, col].set_title("Activity")
    axes[row, col].set_ylabel("total distance (m)")
    axes[row, col].legend().set_visible(False)
    axes[row, col].spines['right'].set_visible(False)
    axes[row, col].spines['top'].set_visible(False)

    if col < 7:
        col += 1
        row = row
    else:
        col = 0
        row += 1

    fig.tight_layout()
    fig.savefig("FigProfile{}_Events_night_{}.pdf".format(valueCat, night),
                dpi=100)
    plt.close(fig)

Beispiel #53

0

Datei anzeigen

Datei: Bagging Iris Model.py Projekt: Omkar1811/ML-DL-Models

Y


# In[61]:


import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(20,15), facecolor='white')
plotnumber = 1

for column in X:
    if plotnumber<=len(X) :
        ax = plt.subplot(3,3,plotnumber)
        sns.stripplot(Y,X[column])
    plotnumber+=1
plt.show()


# In[45]:


from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y,test_size=.30,random_state=355)


# In[46]:


from sklearn.tree import DecisionTreeClassifier

Beispiel #54

0

Datei anzeigen

Datei: tryseaborn.py Projekt: mallain/aicasts.com

#!/usr/bin/env python
# coding: utf-8

# In[1]:

import seaborn as sns
sns.set_theme(style="whitegrid")
tips = sns.load_dataset("tips")
ax = sns.stripplot(x=tips["total_bill"])

# In[70]:

sns.__version__

# In[16]:

import pandas as pd

# In[46]:

tips[tips.day == "Sun"]

# In[57]:

rec = tips.iloc[[77, 90, 19, 1]]

# In[58]:

# In[59]:

type(tips)

Beispiel #55

0

Datei anzeigen

def rank_genes_groups_violin(adata,
                             groups=None,
                             n_genes=20,
                             use_raw=None,
                             split=True,
                             scale='width',
                             strip=True,
                             jitter=True,
                             size=1,
                             computed_distribution=False,
                             ax=None,
                             show=None,
                             save=None):
    """Plot ranking of genes for all tested comparisons.

    Parameters
    ----------
    adata : :class:`~scanpy.api.AnnData`
        Annotated data matrix.
    groups : list of `str`, optional (default: `None`)
        List of group names.
    n_genes : `int`, optional (default: 20)
        Number of genes to show.
    use_raw : `bool`, optional (default: `None`)
        Use `raw` attribute of `adata` if present. Defaults to the value that
        was used in :func:`~scanpy.api.tl.rank_genes_groups`.
    split : `bool`, optional (default: `True`)
        Whether to split the violins or not.
    scale : `str` (default: 'width')
        See `seaborn.violinplot`.
    strip : `bool` (default: `True`)
        Show a strip plot on top of the violin plot.
    jitter : `int`, `float`, `bool`, optional (default: `True`)
        If set to 0, no points are drawn. See `seaborn.stripplot`.
    size : `int`, optional (default: 1)
        Size of the jitter points.
    computed_distribution : `bool`, optional (default: `False`)
        Set to `True` if you want to use the scaled and shifted distribution
        previously computed with the `compute_distribution` in
        :func:`scanpy.api.tl.rank_genes_groups`
    show : `bool`, optional (default: `None`)
        Show the plot, do not return axis.
    save : `bool` or `str`, optional (default: `None`)
        If `True` or a `str`, save the figure. A string is appended to the
        default filename. Infer the filetype if ending on \{'.pdf', '.png', '.svg'\}.
    ax : `matplotlib.Axes`, optional (default: `None`)
        A `matplotlib.Axes` object.
    """
    from ..tools import rank_genes_groups
    groups_key = str(adata.uns['rank_genes_groups']['params']['groupby'])
    if use_raw is None:
        use_raw = bool(adata.uns['rank_genes_groups']['params']['use_raw'])
    reference = str(adata.uns['rank_genes_groups']['params']['reference'])
    groups_names = (adata.uns['rank_genes_groups']['names'].dtype.names
                    if groups is None else groups)
    if isinstance(groups_names, str): groups_names = [groups_names]
    for group_name in groups_names:
        keys = []
        gene_names = adata.uns['rank_genes_groups']['names'][
            group_name][:n_genes]
        if computed_distribution:
            for gene_counter, gene_name in enumerate(gene_names):
                identifier = rank_genes_groups._build_identifier(
                    groups_key, group_name, gene_counter, gene_name)
                if compute_distribution and identifier not in set(
                        adata.obs_keys()):
                    raise ValueError(
                        'You need to set `compute_distribution=True` in '
                        '`sc.tl.rank_genes_groups()`.')
                keys.append(identifier)
        else:
            keys = gene_names
        # make a "hue" option!
        df = pd.DataFrame()
        for key in keys:
            if adata.raw is not None and use_raw:
                X_col = adata.raw[:, key].X
            else:
                X_col = adata[:, key].X
            if issparse(X_col): X_col = X_col.toarray().flatten()
            df[key] = X_col
        df['hue'] = adata.obs[groups_key].astype(str).values
        if reference == 'rest':
            df['hue'][df['hue'] != group_name] = 'rest'
        else:
            df['hue'][~df['hue'].isin([group_name, reference])] = np.nan
        df['hue'] = df['hue'].astype('category')
        df_tidy = pd.melt(df, id_vars='hue', value_vars=keys)
        x = 'variable'
        y = 'value'
        hue_order = [group_name, reference]
        import seaborn as sns
        ax = sns.violinplot(x=x,
                            y=y,
                            data=df_tidy,
                            inner=None,
                            hue_order=hue_order,
                            hue='hue',
                            split=split,
                            scale=scale,
                            orient='vertical',
                            ax=ax)
        if strip:
            ax = sns.stripplot(x=x,
                               y=y,
                               data=df_tidy,
                               hue='hue',
                               dodge=True,
                               hue_order=hue_order,
                               jitter=jitter,
                               color='black',
                               size=size,
                               ax=ax)
        ax.set_xlabel('genes')
        ax.set_title('{} vs. {}'.format(group_name, reference))
        ax.legend_.remove()
        if computed_distribution: ax.set_ylabel('z-score w.r.t. to bulk mean')
        else: ax.set_ylabel('expression')
        ax.set_xticklabels(gene_names, rotation='vertical')
        writekey = ('rank_genes_groups_' +
                    str(adata.uns['rank_genes_groups']['params']['groupby']) +
                    '_' + group_name)
        utils.savefig_or_show(writekey, show=show, save=save)

Beispiel #56

0

Datei anzeigen

Datei: model1.py Projekt: HELLOshi-art/hello123

    model_name = model.__class__.__name__
    accuracies = cross_val_score(model,
                                 features,
                                 labels,
                                 scoring='accuracy',
                                 cv=CV)
    for fold_idx, accuracy in enumerate(accuracies):
        entries.append((model_name, fold_idx, accuracy))
cv_df = pd.DataFrame(entries, columns=['model_name', 'fold_idx', 'accuracy'])

#绘制箱线图
sns.boxplot(x='model_name', y='accuracy', data=cv_df)
sns.stripplot(x='model_name',
              y='accuracy',
              data=cv_df,
              size=8,
              jitter=True,
              edgecolor="gray",
              linewidth=2)
plt.show()

#线性SVC模型调用
model = LinearSVC()
X_train, X_test, y_train, y_test, indices_train, indices_test = train_test_split(
    features,
    labels,
    data_after_stop.index,
    test_size=0.3,
    stratify=labels,
    random_state=0)
model.fit(X_train, y_train)

Beispiel #57

0

Datei anzeigen

Datei: acute_single_worm.py Projekt: saulmoore1/PhD_Project

def window_boxplot_fepD_vs_BW(metadata, 
                              features, 
                              feat='motion_mode_paused_fraction',
                              windows=None,
                              save_dir=None):    
    
    import seaborn as sns
    from matplotlib import transforms
    from matplotlib import pyplot as plt

    plot_df = metadata[['bacteria_strain','window','date_yyyymmdd']].join(features[[feat]])
    
    if windows is not None:
        assert all(w in sorted(plot_df['window'].unique()) for w in windows)
        plot_df = plot_df[plot_df['window'].isin(windows)]
    else:
        windows = sorted(plot_df['window'].unique())
    
    bacteria_strain_list = ['BW', 'fepD']
    
    plt.close('all')
    fig, ax = plt.subplots(figsize=(max(8,len(windows)),8))
    sns.boxplot(x='window', 
                y=feat, 
                order=windows,
                hue='bacteria_strain', 
                hue_order=bacteria_strain_list, 
                dodge=True,
                ax=ax, 
                palette='tab10', 
                showfliers=False,
                data=plot_df)
    dates = list(plot_df['date_yyyymmdd'].unique())
    date_col_dict = dict(zip(dates, sns.color_palette('Greys', n_colors=len(dates))))
    for date in dates:
        sns.stripplot(x='window',
                      y=feat,
                      order=windows,
                      hue='bacteria_strain',
                      hue_order=bacteria_strain_list,
                      dodge=True,
                      ax=ax,
                      s=3, marker='D',
                      color=sns.set_palette(palette=[date_col_dict[date]], 
                                            n_colors=len(bacteria_strain_list)),
                      data=plot_df[plot_df['date_yyyymmdd']==date])
    
    # scale plot y-axis
    scale_outliers = False
    if scale_outliers:
        grouped_strain = plot_df.groupby('window')
        y_bar = grouped_strain[feat].median() # median is less skewed by outliers
        Q1, Q3 = grouped_strain[feat].quantile(0.25), grouped_strain[feat].quantile(0.75)
        IQR = Q3 - Q1
        plt.ylim(min(y_bar) - 2.5 * max(IQR), max(y_bar) + 2.5 * max(IQR))
 
    # load t-test results for fepD vs BW at each window
    t_test_path = stats_dir / 'pairwise_ttests' / 'fepD_window_results.csv'
    ttest_df = pd.read_csv(t_test_path, index_col=0)
    pvals = ttest_df[[c for c in ttest_df if 'pvals_' in c]]

    # annotate p-values
    for ii, window in enumerate(windows):
        p = pvals.loc[feat, 'pvals_{}'.format(window)]
        text = ax.get_xticklabels()[ii]
        assert text.get_text() == str(window)
        p_text = 'P < 0.001' if p < 0.001 else 'P = %.3f' % p
        trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
        plt.plot([ii-.2, ii-.2, ii+.2, ii+.2], 
                 [0.98, 0.99, 0.99, 0.98], lw=1.5, c='k', transform=trans)
        ax.text(ii, 1.01, p_text, fontsize=9, ha='center', va='bottom', transform=trans)

    # legend and labels
    n_labs = len(bacteria_strain_list)
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:n_labs], labels[:n_labs], fontsize=12, frameon=False, loc=(1.01, 0.9),
              handletextpad=0.2)
    ax.set_xlabel('')
    ax.set_xticklabels([WINDOW_DICT_STIM_TYPE[w] for w in windows])
    ax.set_ylabel(feat.replace('_',' '), fontsize=12, labelpad=10)

    plt.subplots_adjust(right=0.85)
    
    if save_dir is not None:
        save_path = Path(save_dir) / '{}_windows'.format(len(windows)) / '{}.png'.format(feat)
        save_path.parent.mkdir(parents=True, exist_ok=True)
        plt.savefig(save_path, dpi=300)
    else:
        plt.show()    
      
    return

Beispiel #58

0

Datei anzeigen

def generate_graphs(fname, fluoros, tps, conditions):
    """
    Takes transposed data and generates all sets of 
    dot plots and scatter plots across all conditions,
    fluorophores, and timepoints

    Extracts Data Frames from dic = get_user_dict(c_list, timepoints, fluorophores, xls, c_tp_list)

    df = dic[condition][cd_tp]
    """
    n = len(tps)

    #Retrieve data for each condition, timepoint, and fluorophore.

    #Step 0: Generate scatter list of permutations of fluorophores to graph
    scatter_list = list(itertools.combinations(fluoros, 2))
    print("Fluorophores to be plotted against each other: ")
    for pair in scatter_list:
        print(pair[0], " vs. ", pair[1], "\n")

    #Step 1: Load the tranposed file
    #Read Transposed Intermediate excel
    #Returns a dictionary - the keys are the sheet names, and the values are the sheets as dataframes.
    df_dic = pd.read_excel(fname, sheet_name=None)

    xls = pd.ExcelFile(fname)

    n = len(tps)
    f = len(fluoros)
    c = len(conditions)

    #Create sheet of all dataframes that need a timepoint column
    #parse conditions from sheetnames
    sheet_conds, sheet_tps, sheet_cd_tps = parse_sheetnames(xls)
    sheet_conds = list(set(sheet_conds))

    #Step 2: Generate dictionary of all dfs grouped by condition
    #plot_dic = {'BM' : [bm1.df, bm2.df, ...], ...}
    plot_dic = {}
    for cond in sheet_conds:
        plot_dic[cond] = []

    keys = list(df_dic.keys())
    for cond in sheet_conds:
        for i in range(len(keys)):
            key_low = keys[i].lower()
            cond_low = cond.lower()
            if cond_low in key_low:
                plot_dic[cond].append(df_dic[keys[i]])

    #Step 3: Remove statistical outliers and add Timepoint column to each dataframe
    #FILTER OUTLIERS OR NOT:
    finished = False
    while not (finished):
        answer = input(
            "Would you like to filter outliers from your plots?\nOutliers are values >2 SD from the mean. Answer Y or N: "
        )
        if answer.upper() == "Y":
            for cond in plot_dic:
                #Get the list of sheets for that condition
                sheets_list = plot_dic[cond]
                #for each df in a condition,
                for sheet_df in sheets_list:
                    #Get column names
                    columns = list(sheet_df.columns)
                    #For each column name
                    for col in columns:
                        #Filter outliers by stddev in each column
                        mean = sheet_df[col].mean()
                        sd = sheet_df[col].std()
                        sheet_df = sheet_df[(np.abs(sheet_df[col] - mean) <
                                             2 * sd)]
                for i in range(n):
                    df = sheets_list[i]
                    df['Timepoint'] = tps[i]
                    #Remove unnamed column
                    df.drop('Unnamed: 0', inplace=True, axis=1)
            finished = True
            break
        if answer.upper() == "N":
            for cond in plot_dic:
                #Get the list of sheets for that condition
                sheets_list = plot_dic[cond]
                for i in range(n):
                    df = sheets_list[i]
                    df['Timepoint'] = tps[i]
                    #Remove unnamed column
                    df.drop('Unnamed: 0', inplace=True, axis=1)
            finished = True
            break
        else:
            print("You did not type Y or N. Please reenter. \n")

    #Step 4: Plot Scatter plots with or without trendline
    #TREND LINE OR NOT:
    finished = False
    while not (finished):
        answer = input("Would you like a trend line? Answer Y or N: ")
        if answer.upper() == "Y":
            #lmplot == scatter plot with trendline
            for cond in sheet_conds:
                for pair in scatter_list:
                    kwargs = {'edgecolor': "white"}
                    g = sns.lmplot(x=pair[1],
                                   y=pair[0],
                                   hue='Timepoint',
                                   data=pd.concat(plot_dic[cond]),
                                   ci=None,
                                   scatter_kws=kwargs)
                    plt.xlabel(pair[1] + " Intensity (AU)")
                    plt.ylabel(pair[0] + ' Intensity (AU)')
                    plt.xlim(0, None)
                    plt.ylim(0, None)
                    plt.title(cond)
                    plt.show()
            finished = True
            break
        if answer.upper() == "N":
            #relplot == scatter plot without trendline
            for cond in sheet_conds:
                for pair in scatter_list:
                    g = sns.relplot(x=pair[1],
                                    y=pair[0],
                                    hue='Timepoint',
                                    data=pd.concat(plot_dic[cond]),
                                    kind='scatter')
                    plt.xlabel(pair[1] + " Intensity (AU)")
                    plt.ylabel(pair[0] + ' Intensity (AU)')
                    plt.xlim(0, None)
                    plt.ylim(0, None)
                    plt.title(cond)
                    plt.show()
            finished = True
            break
        else:
            print("You did not type Y or N. Please reenter. \n")

    #Step 5: Restructure data frames for Dot Plots
    #Add condition column to each dataframe
    dotplot_df = pd.DataFrame()
    for cond in plot_dic:
        length = len(plot_dic[cond])
        for i in range(length):
            #Get dataframe
            cond_df = plot_dic[cond][i]
            #Add Condition column
            cond_df['Condition'] = cond
            #Concatenate the df to master dotplot df
            dotplot_df = pd.concat([dotplot_df, cond_df])

    #Step 6: Plot Dot Plots
    print("\nDot plots to be plotted: ")
    for f in fluoros:
        print(f"{f}\n")
    #ADD BOXPLOT OR VIOLINPLOT OR NOT:
    finished = False
    while not (finished):
        answer = input(
            "Would you like a box plot or violin plot overlaid on the dot plots? Answer Y or N: "
        )
        if answer.upper() == "Y":
            finished2 = False
            while not (finished2):
                answer2 = input(
                    "Please enter box for box plot, and enter violin for violin plot: "
                )
                if answer2.lower() == "box":
                    for f in fluoros:
                        g = sns.boxplot(x="Condition",
                                        y=f,
                                        data=dotplot_df,
                                        hue='Timepoint')
                        g = sns.stripplot(x='Condition',
                                          y=f,
                                          hue="Timepoint",
                                          data=dotplot_df,
                                          jitter=True,
                                          dodge=True,
                                          edgecolor='w',
                                          linewidth=0.5)
                        plt.ylim(0, None)
                        plt.ylabel(f + " Intensity (AU)")
                        plt.title(f)
                        plt.show()
                    finished2 = True
                    break
                if answer2.lower() == "violin":
                    for f in fluoros:
                        g = sns.violinplot(x="Condition",
                                           y=f,
                                           data=dotplot_df,
                                           hue='Timepoint')
                        g = sns.stripplot(x='Condition',
                                          y=f,
                                          hue="Timepoint",
                                          data=dotplot_df,
                                          jitter=True,
                                          dodge=True,
                                          edgecolor='w',
                                          linewidth=0.5)
                        plt.ylim(0, None)
                        plt.ylabel(f + " Intensity (AU)")
                        plt.title(f)
                        plt.show()
                    finished2 = True
                    break
                else:
                    print("You did not type box or violin. Please reenter. \n")
            finished = True
            break
        if answer.upper() == "N":
            for f in fluoros:
                g = sns.stripplot(x='Condition',
                                  y=f,
                                  hue="Timepoint",
                                  data=dotplot_df,
                                  jitter=True,
                                  dodge=True,
                                  edgecolor='w',
                                  linewidth=0.5)
                plt.ylim(0, None)
                plt.ylabel(f + " Intensity (AU)")
                plt.title(f)
                plt.show()
            finished = True
            break
        else:
            print("You did not type Y or N. Please reenter. \n")

    pass

Beispiel #59

0

Datei anzeigen

Datei: data_frames.py Projekt: swigder/bilingual-search-engine

 def df_function(collection_df, other, ax):
     x_attr, hue_attr = (other, attribute) if not reverse else (attribute, other)
     sns.stripplot(x=x_attr, y=SCORE, hue=hue_attr, data=collection_df,
                   order=sorted(collection_df[x_attr].unique()),
                   jitter=0.1, dodge=True, alpha=0.5, ax=ax)

Beispiel #60

0

Datei anzeigen

g = sns.pairplot(train[features_of_interest], hue='Survived', palette = 'seismic',
                 diag_kind='kde', diag_kws=dict(shade=True), plot_kws=dict(s=10))
g.set(xticklabels=[])


# ## 4.2 Breakdown by Categories
# The correlation is a nice start. Now let's show how survival changes with some of these categories

# In[ ]:


# Plot
sns.set_style('white')
fig = plt.figure(figsize=(12,12))
ax = sns.stripplot(x='Title', y='fare_pp', data=train, jitter=0.2,
                  alpha=0.9, hue='Survived', split=False, palette="RdBu")

# Label
title = plt.title("Titles and Money", fontsize=14, fontweight='bold')
title.set_position([.5, 1.03])
plt.ylabel('Fare per Person ($)', fontsize=11, fontweight='bold')
plt.xlabel('Title', fontsize=11, fontweight='bold')
ax.set_ylim(-1,100);

# Y-Axis Ticks
def dollars(x, pos):
    #The two args are the value and tick position
    return '$%1.2f' % (x)
formatter = FuncFormatter(dollars)
ax.yaxis.set_major_formatter(formatter)