Beispiel #1
0
def plot_quantiles(data, err=None, quantiles=None, axes=None, colors=None, labels=None, kde=None, bw=.008):
    """ plotting function for displaying model-predicted
    quantile-probabilities over empirical estimates
    """
    y_data, yhat_data = data
    c, c_hat = colors
    if axes is not None:
        axc, axe = axes
    else:
        f, (axc, axe) = plt.subplots(1, 2, figsize=(10, 4))
    qc, qc_hat = y_data[1], yhat_data[1]
    qe, qe_hat = y_data[2], yhat_data[2]
    if quantiles is None:
        quantiles = np.linspace(.1, .9, qc.size)
    if err is not None:
        qc_err, qe_err = err
    else:
        qc_err, qe_err = [np.zeros(len(qc))]*2
    if kde is not None:
        qc_kde, qe_kde = kde[1], kde[2]
        sns.kdeplot(qc_kde, cumulative=1, color=c, ax=axc, linewidth=2, linestyle='-', bw=bw)
        sns.kdeplot(qe_kde, cumulative=1, color=c, ax=axe, linewidth=2, linestyle='-', bw=bw)
    axc.errorbar(qc, quantiles, xerr=qc_err, color=c, linewidth=0, elinewidth=1.5, marker='o', ms=5, label=labels)
    axe.errorbar(qe, quantiles, xerr=qe_err, color=c, linewidth=0, elinewidth=1.5, marker='o', ms=5, label=labels)
    axc.plot(qc_hat, quantiles, mec=c_hat, linewidth=0, marker='o', ms=10, mfc='none', mew=1.7, label=labels)
    axe.plot(qe_hat, quantiles, mec=c_hat, linewidth=0, marker='o', ms=10, mfc='none', mew=1.7, label=labels)
Beispiel #2
0
def dist_small_multiples(df, figsize=(20, 20)):
    """
    Small multiples plots of the distribution of a dataframe's variables.
    """
    import math

    sns.set_style("white")

    num_plots = len(df.columns)
    n = int(math.ceil(math.sqrt(num_plots)))

    fig = plt.figure(figsize=figsize)
    axes = [plt.subplot(n, n, i) for i in range(1, num_plots + 1)]

    i = 0
    for k, v in df.iteritems():
        ax = axes[i]
        sns.kdeplot(v, shade=True, ax=ax, legend=False)
        sns.rugplot(v, ax=ax, c=sns.color_palette("husl", 3)[0])
        [label.set_visible(False) for label in ax.get_yticklabels()]
        ax.xaxis.set_ticks([v.min(), v.max()])
        ax.set_title(k)
        i += 1
    sns.despine(left=True, trim=True, fig=fig)
    plt.tight_layout()
    return fig, axes
def plot_retest_data(retest_data, size=4.6, save_dir=None):
    colors = [sns.color_palette('Reds_d',3)[0], sns.color_palette('Blues_d',3)[0]]
    f = plt.figure(figsize=(size,size*.75))
    # plot boxes
    with sns.axes_style('white'):
        box_ax = f.add_axes([.15,.1,.8,.5]) 
        sns.boxplot(x='icc3.k', y='Measure Category', ax=box_ax, data=retest_data,
                    palette={'Survey': colors[0], 'Task': colors[1]}, saturation=1,
                    width=.5, linewidth=size/4)
    box_ax.text(0, 1, '%s Task measures' % Task_N, color=colors[1], fontsize=size*2)
    box_ax.text(0, 1.2, '%s Survey measures' % Survey_N, color=colors[0], fontsize=size*2)
    box_ax.set_ylabel('Measure category', fontsize=size*2, labelpad=size)
    box_ax.set_xlabel('Intraclass correlation coefficient', fontsize=size*2, labelpad=size)
    box_ax.tick_params(labelsize=size*1.5, pad=size, length=2)
    [i.set_linewidth(size/5) for i in box_ax.spines.values()]

    # plot distributions
    dist_ax = f.add_axes([.15,.6,.8,.4]) 
    dist_ax.set_xlim(*box_ax.get_xlim())
    dist_ax.set_xticklabels('')
    dist_ax.tick_params(length=0)
    for i, (name, g) in enumerate(retest_data.groupby('Measure Category')):
        sns.kdeplot(g['icc3.k'], color=colors[i], ax=dist_ax, linewidth=size/3, 
                    shade=True, legend=False)
    dist_ax.set_ylim((0, dist_ax.get_ylim()[1]))
    dist_ax.axis('off')
    if save_dir:
        plt.savefig(save_dir, dpi=dpi, bbox_inches='tight')
def kde_tissue(tissue, q, genes, x, y, dfplot, dfindex, ax, label, col= 'b'):
    """
    Plots all the tissue specific genes,i.e. all genes that appear in one and only
    one 'tissue'
    tissue -- tissue to plot
    q -- qvalue to slice on
        
    dfindex -- the dataframe generated by organizer
    
    dfplot -- the dataframe containing columns x, y and genes
    x -- the name of the column containing the values  to plot in the histogram
    y -- the name of the column with which to slice the dataframe (q or p value)
    genes -- the name of the column containing the WBID names
    
    label -- name of the plot just made
    ax -- axis to plot in
    col -- color
    """
    g= lambda x:((dfindex.expressed == 1) & (dfindex.tissue == x))\
       # & (~dfindex[dfindex.expressed == 1].duplicated('gene')) 
    f= lambda x: (dfplot[genes].isin(x)) & (dfplot[y] < q)
    
    gene_selection= g(tissue)    
    genes_to_plot= dfindex[gene_selection].gene
    
    ind= f(genes_to_plot)
    to_plot= dfplot[ind][x]
    
    n= len(dfplot[ind][genes].unique())
    if len(to_plot) > 15:
        sns.kdeplot(to_plot, color= col,label= label+' n= {0}'.format(n), ax= ax, 
                    lw= 5, cut=0.5)        
        if len(to_plot) <= 20:
            sns.rugplot(to_plot, color= col, ax= ax, height= .07, lw= 2)
def kde_target(var_name, df):
    """用于单个特征的kde可视化

    返回信息:
        与 target 的 相关度(皮尔逊相关系数)
        not repaid 的特征中位数
        repaid     的特征中位数
    """
    # Calculate the correlation coefficient between the new variable and the target
    corr = df['TARGET'].corr(df[var_name])

    # Calculate medians for repaid vs not repaid
    avg_repaid = df.ix[df['TARGET'] == 0, var_name].median()
    avg_not_repaid = df.ix[df['TARGET'] == 1, var_name].median()

    plt.figure(figsize=(12, 6))

    # Plot the distribution for target == 0 and target == 1
    sns.kdeplot(df.ix[df['TARGET'] == 0, var_name], label='TARGET == 0')
    sns.kdeplot(df.ix[df['TARGET'] == 1, var_name], label='TARGET == 1')

    # label the plot
    plt.xlabel(var_name);
    plt.ylabel('Density');
    plt.title('%s Distribution' % var_name)
    plt.legend();
    plt.show()

    # print out the correlation
    print('%s与标签相关度 %0.4f' % (var_name, corr))

    print('not repaid = %0.4f' % avg_not_repaid)
    print('repaid =     %0.4f' % avg_repaid)
Beispiel #6
0
def plot_marker_distribution(datalist, namelist, labels, grid_size,
                             fig_path=None, letter_size=16):
    nmark = len(labels)
    assert len(datalist) == len(namelist)
    g_i, g_j = grid_size
    
    colors = sns.color_palette("Set1", n_colors=len(datalist), desat=.5)
    
    fig = plt.figure()
    grid = gridspec.GridSpec(g_i, g_j, wspace=0.1, hspace=0.05)
    for i in range(g_i):
        for j in range(g_j):
            seq_index = g_j * i + j
            if seq_index < nmark:
                ax = fig.add_subplot(grid[i,j])
                start = .5
                ax.text(start,.85, labels[seq_index],
                    horizontalalignment='center',
                    transform=ax.transAxes, size=letter_size)    
                for i_name, (name, x) in enumerate(zip(namelist, datalist)):
                    lower = np.percentile(x[:,seq_index], 0.5)
                    upper = np.percentile(x[:,seq_index], 99.5)
                    if seq_index == nmark - 1:
                        sns.kdeplot(x[:,seq_index], color=colors[i_name], label=name,
                                    clip=(lower, upper))
                    else:
                        sns.kdeplot(x[:,seq_index], color=colors[i_name],
                                    clip=(lower, upper))
                clean_axis(ax)
    plt.legend(loc="upper right", prop={'size':letter_size})
    if fig_path is not None:
        plt.savefig(fig_path, format='eps')
        plt.close()
    else:
        plt.show()
def create1Ddensityplot(data, outputfilename):
    plt.clf()
    f, (ax1) = plt.subplots(1, 1, sharex=True, figsize=(8, 6))
    # with sns.axes_style("white"):
    #sns.jointplot("compression", "wiener index",atomizationInfo, kind="kde");
    sns.kdeplot(data, shade=True, ax=ax1, clip=(0, 1), bw=0.5)
    plt.savefig(outputfilename)
Beispiel #8
0
def plot_KL(data):
    """Kullback-Leibler divergence, given a Dataset object.
    The 'true' distribution is the data one"""
    frequencies = data.frequencies
    Ncat = data.Ncat
    fiducial = data.generate_mc(100)

    sh, loc, sc = data.lognorm_par()
    freq_ln = [np.sort(stats.lognorm.rvs(sh, scale=sc, size=Ncat, random_state=s))[::-1]
               for s in range(1, 1001)]
    kl_ln = [stats.entropy(frequencies, r) for r in freq_ln]

    lengths = [min(Ncat, len(mc)) for mc in fiducial]  # Cut to the minimum Ncat
    kl_data = [stats.entropy(frequencies[:lengths[i]], mc[:lengths[i]]) for i, mc in enumerate(fiducial)]

    # Plot KL divergence. Use kdeplot instead of histogram
    fig = plt.figure(figsize=[10, 6.18])
    plt.title('Kullback-Leibler divergence')
    # plt.hist(kl_data, bins=10, normed=True, label='MC', alpha=0.5)
    # plt.hist(kl_ln, bins=10, normed=True, label='Lognormal', alpha=0.5, color='Blue')
    sns.kdeplot(np.array(kl_data), label='MC', alpha=0.6, color='Blue')
    sns.kdeplot(np.array(kl_ln), label='Lognormal', alpha=0.6, color='Orange')
    plt.xlim(xmin=0.)
    # plt.axvline(ks_tree[0], c='Purple', label = 'Tree model')
    # plt.axvline(kl_ln, c='Orange', label = 'Lognormal')
    plt.legend(loc='best')
    # plt.savefig(os.path.join('all_data', 'KL_'+data.name+'.png'))
    return
Beispiel #9
0
def _plot_continuous(df, xlabel, ylabel, ax, plottype="kde", n_levels=10,
                     cmap="YlGnBu", shade=True):

    """
    Plot a two continuous variables against each other in a scatter plot or a
    kernel density estimate.

    Parameters
    ----------
    df : pd.DataFrame
        A pandas DataFrame with the data

    xlabel : str
        The column name for the variable on the x-axis

    ylabel : str
        The column name for the variable on the y-axis

    ax : matplotlib.Axes object
        The matplotlib.Axes object to plot the bubble plot into

    plottype : {"kde" | "scatter"}
        The type of plot to produce. Either a kernel density estimate ("kde")
        or a scatter plor ("scatter").

    n_levels : int
        the number of levels to plot for the kernel density estimate plot.
        Default is 10

    cmap : matplotlib.cm.colormap
        A matplotlib colormap to use for shading the bubbles

    shade : bool
        If True, plot kernel density estimate contours in coloured shades.
        If False, plot only the outline of each contour.

    Returns
    -------
    ax : matplotlib.Axes object
        The same matplotlib.Axes object for further manipulation

    """


    xcolumn = df[xlabel]
    ycolumn = df[ylabel]
    x_clean = xcolumn[np.isfinite(xcolumn) & np.isfinite(ycolumn)]
    y_clean = ycolumn[np.isfinite(ycolumn) & np.isfinite(xcolumn)]

    if plottype == "kde":
        sns.kdeplot(x_clean, y_clean, n_levels=n_levels, shade=shade,
                    ax=ax, cmap=cmap)

    elif plottype == "scatter":
        current_palette = sns.color_palette(cmap, 5)
        c = current_palette[2]
        ax.scatter(x_clean, y_clean, color=c, s=10, lw=0,
                   edgecolor="none", alpha=0.8)

    return ax
 def do_kdeplot(x, y, ax, n_levels=None, bw='scott'):
     try:
         sns.kdeplot(x, y, ax=ax, cut=0, cmap='Purples_d', shade=True, shade_lowest=False, n_levels=n_levels, bw=bw,
                     rasterized=True)
     except:
         logger.warning('Unable to do a KDE fit to AUGUSTUS improvement.')
         pass
def plot_density(exp_res, title, xlim=(0.7, 1.0), ylim=(0.8, 1.0), cmap='Reds', saveto=None):

    sns.set_context("notebook", font_scale=2.0, rc={"lines.linewidth": 2.5})
    sns.set_style("whitegrid")
    training_dfs = []
    for item in exp_res:
        training_data, training_df, best_training_row, match_res = item
        training_dfs.append(training_df)
    combined = pd.concat(training_dfs, axis=0)
    combined = combined.reset_index(drop=True)

    f, ax = plt.subplots(figsize=(6, 6))
    sns.kdeplot(combined.Rec, combined.Prec, ax=ax, cmap=cmap, shade=True, shade_lowest=False)
    # sns.rugplot(combined.Rec, ax=ax)
    # sns.rugplot(combined.Prec, vertical=True, ax=ax)
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

#     g = sns.JointGrid(x="Rec", y="Prec", data=combined, xlim=xlim, ylim=ylim)
#     g = g.plot_joint(sns.kdeplot)
#     g = g.plot_marginals(sns.kdeplot, shade=True)
#     ax = g.ax_joint
#     ax.set_xlabel('Rec', fontsize=36)
#     ax.set_ylabel('Prec', fontsize=36)
#     ax = g.ax_marg_x

    ax.set_title(title, fontsize=36)
    # plt.tight_layout()
    if saveto is not None:
        plt.savefig(saveto)
def make_kde_plot(df, spot, runid, title=None, cmap='Greens', plotclass=None, logfile=None, debug=False):
    plt.figure()
    ptf('Plot KDE %s - %s' % (title, spot), logfile)
    x,y = stack_rows(df, spot)
    ptf('%s, %s' % (x.shape, y.shape), logfile)
    ptf('Check for nans', logfile)
    ptf('%s, %s' % (np.sum(np.isnan(x)), np.sum(np.isnan(y))), logfile)
    ptf('computing kde...', logfile)
    sns.kdeplot(x,y, shade=True, cmap=cmap)

    plottitle = runid + '-' + spot + ' - KDE trigger vs t'
    if title:
        plottitle += ' - ' + title
    if plotclass:
        plottitle += ' - ' + plotclass

    plt.title(plottitle)
    plt.xlabel('t (hrs)')
    if title:
        plt.ylabel(title)
    else:
        plt.ylabel('trigger metric')
    filename = runid + '/' + runid + '-' + spot + ' - KDE trigger vs t'
    if title:
        filename += ' - ' + title
    if plotclass:
        filename += ' - ' + plotclass

    ptf('Saving plot %s' % filename, logfile)
    plt.savefig(filename, dpi=200)
    if debug:
        plt.show()
    else:
        plt.close()
    def build_reads_per_cluster(self, ax_nreads, reads_per_cluster=None):

        """

        Draws the number of reads per cluster for each cluster

        ax - the axis to draw on
        reads_per_cluster - list, the number of reads in a cluster


        """
        if reads_per_cluster is None:
            reads_per_cluster = self.reads_per_cluster

        if reads_per_cluster is None:
            raise NotImplementedError("Pickle file doesn't have data to generate this figure")

        sns.kdeplot(np.array(self.reads_per_cluster), ax=ax_nreads)
        [tick.set_rotation(90) for tick in ax_nreads.get_xticklabels()]
        ax_nreads.set_xlim(0,)

        ax_nreads.set_xlabel("N reads)")
        ax_nreads.set_ylabel("Frequency")

        return ax_nreads
Beispiel #14
0
def tsne_map(z, c, fig_path, colors=None, s=2, suffix='png'):
	c = np.squeeze(c)
	if colors is None:
		colors = sns.color_palette("Set2", len(np.unique(c)))

	sns.set_style('white')
	fig, ax = plt.subplots(figsize=(5,5))
	sns.kdeplot(z[:,0], z[:,1], colors='lightgray', cmap=None, linewidths=0.5)
	#ax = add_contour(z[c==0], ax)
	for i in np.unique(c):
		if i > 0:
			plt.scatter(z[c==i, 0], z[c==i, 1], s=s, marker='o', c=colors[i],
						edgecolors='face')

	clean_axis(ax)
	ax.grid(False)

	#plt.legend(loc="upper left", markerscale=20., scatterpoints=1, fontsize=10)
	#plt.xlabel('tSNE axis 1', fontsize=20)
	#plt.ylabel('tSNE axis 2', fontsize=20)
	#sns.despine(left=True, bottom=True)
	sns.despine()
	plt.savefig(fig_path + '.%s' % suffix, format=suffix)
	plt.clf()
	plt.close()
Beispiel #15
0
def estimate_bivariate_mle_jr():
    ndim = 2
    size = (10000, ndim)
    data = np.random.normal(size=size)
    eta, lam = 4, -.9
    skst = SkewStudent(eta=eta, lam=lam)
    data = skst.rvs(size=size)

    model = SkStJR(ndim=ndim, data=data)
    out = model.fit_mle()
    print(out)

    model.from_theta(out.x)

    fig, axes = plt.subplots(nrows=size[1], ncols=1)
    for innov, ax in zip(data.T, axes):
        sns.kdeplot(innov, ax=ax, label='data')

    lines = [ax.get_lines()[0].get_xdata() for ax in axes]
    lines = np.vstack(lines).T
    marginals = model.marginals(lines)

    for line, margin, ax in zip(lines.T, marginals.T, axes):
        ax.plot(line, margin, label='fitted')
        ax.legend()

    plt.show()
    def build_cluster_lengths(self, ax_lengths, cluster_lengths=None):

        """

        Selects a random sample of all cluster length
        and draws 2000 of them in a boxplot

        ax - the axis to draw on
        cluster_lengths - list, the length of each cluster


        """

        if cluster_lengths is None:
            cluster_lengths = self.cluster_lengths

        if cluster_lengths is None:
            raise NotImplementedError("Pickle file doesn't have data to generate this figure")

        sns.kdeplot(np.array(self.cluster_lengths), ax=ax_lengths)
        [tick.set_rotation(90) for tick in ax_lengths.get_xticklabels()]
        ax_lengths.set_xlim(0,)
        ax_lengths.set_ylabel("Frequency")
        ax_lengths.set_xlabel("Length (bp)")
        return ax_lengths
Beispiel #17
0
def plot(df2, df3):
    sns.set(style="white", color_codes=True)
    f, ax = sns.plt.subplots()
    sns.kdeplot(df2.bmi, ax=ax, shade=True, color='k', gridsize=10000, clip=(15, 45))
    sns.kdeplot(df3.bmi, ax=ax, shade=True, color='k', ls='dashed', gridsize=10000, clip=(15, 45))

    plt.legend(['Wave 2 (1996; ages 13-20 y)', 'Wave 3 (2001; ages 19-26 y)'], fontsize=15)
    plt.xlim(15, 45)

    ax.annotate('10.9%', xy=(25, .02), xytext=(30.5, .005), color='k')
    ax.annotate('22.1%', xy=(25, .04), xytext=(30.5, .02), color='k')

    y1 = ax.lines[0].get_ydata()
    x1 = ax.lines[0].get_xdata()
    x_mask1 = np.ma.masked_less_equal(x1, 30).mask
    y_masked1 = np.ma.masked_array(y1, x_mask1)

    y2 = ax.lines[1].get_ydata()
    x2 = ax.lines[1].get_xdata()
    x_mask2 = np.ma.masked_less_equal(x2, 30).mask
    y_masked2 = np.ma.masked_array(y2, x_mask2)

    ax.fill_between(x2, np.zeros_like(y2), y_masked2, facecolor='red', interpolate=True, alpha=0.5)
    ax.fill_between(x1, np.zeros_like(y1), y_masked1, facecolor='white', interpolate=True)
    ax.fill_between(x2, np.zeros_like(y2), y_masked2, facecolor='red', interpolate=True, alpha=0.25)

    plt.vlines(x=30, ymin=0, ymax=0.0398, color='k', linewidth=2, alpha=1)#, ls='dashed')

    plt.xticks(size=15)
    plt.yticks(size=15)

    plt.ylabel('Frequency', fontsize=15)
    plt.xlabel('BMI (kg/m$^2$)', fontsize=15)
    plt.tight_layout()
    plt.show()
Beispiel #18
0
def plot_level(dist_points, dist_expert, level_id, is_hist=True, bw=None, num_bins=NUM_BINS,
	col_points=COL_POINTS, col_expert=COL_EXPERT, lw_expert=LW_EXPERT,
	title=None, xlabel=None, ylabel=None, save_to=None):
	if is_hist:
		ax = dist_points.plot.hist(bins=num_bins, color=col_points)
	else:
		if bw:
			ax = sns.kdeplot(dist_points, bw=bw, color=col_points)
		else:
			ax = sns.kdeplot(dist_points, color=col_points)

	plt.axvline(dist_expert, 0, len(dist_points), color=col_expert, lw=LW_EXPERT)

	plt.title(title or "Distribusi jarak pemain - Level {}".format(level_id))
	plt.xlabel(xlabel or XLABEL)
	if ylabel:
		plt.ylabel(ylabel)
	else:
		plt.ylabel("Jumlah pemain" if is_hist else "Distribusi")

	if save_to:
		try:
			os.makedirs(save_to)
		except:
			pass
		plt.savefig(os.path.join(save_to, 'level{}.png'.format(level_id)))

	return ax
Beispiel #19
0
def  plot_galaxy_and_stars(galaxy, stars):
    
    colors = get_distinct(3)
    single_frame('X [pc]', 'Y [pc]')
    xlim = 60
    pyplot.xlim(-xlim, xlim)
    pyplot.ylim(-xlim, xlim)
    ax = pyplot.gca()

    import numpy as np
    import pandas as pd
    from scipy import stats, integrate
    import matplotlib.pyplot as plt
    import seaborn as sns
    sns.set(color_codes=True)

    p = galaxy.select(lambda x: x<60|units.parsec,["x"])
    p = p.select(lambda x: x>-60|units.parsec,["x"])
    p = p.select(lambda y: y<60|units.parsec,["y"])
    p = p.select(lambda y: y>-60|units.parsec,["y"])
    x = p.x.value_in(units.parsec)
    y = p.y.value_in(units.parsec)
    sns.kdeplot(x, y, ax=ax)
    m = 100*numpy.sqrt(stars.mass/stars.mass.max())
    pyplot.scatter(stars.x.value_in(units.parsec), stars.y.value_in(units.parsec), c=colors[0], s=m, lw=0)
#    pyplot.show()
    pyplot.savefig("Fujii_Comparison_Figure")
Beispiel #20
0
def seaborn_kde():   
    data = np.random.multivariate_normal([0, 0], [[5, 2], [2, 2]], size=2000)
    data = pd.DataFrame(data, columns=['x', 'y'])

    for col in 'xy': 
        sns.kdeplot(data[col], shade=True) 
    plt.show()    
    def dists(self):
        import matplotlib.pyplot as plt
        import seaborn as sns

        print("Plotting distributions for all parameters...")

        keys = [k for k in self._df.keys() if not "_" in k]
        n_plots = len(keys)
        n_cols = 4
        n_rows = n_plots / n_cols

        fig, axs = plt.subplots(n_rows, n_cols, figsize=(n_cols*3.5, n_rows*2))
        for i, (key, ax) in enumerate(zip(keys, axs.ravel())):
            
            kde_args = dict(ax=ax, shade=True)
            if i > 0: kde_args['legend'] = False
            
            data = self._df[key]
            if key.startswith("n") or key.startswith("mu"):
                data = np.log10(data)
                key = "log10(%s)" % key
            
            sns.kdeplot(data, color='k', **kde_args)
            ax.set_xlabel(key)
            plt.setp(ax.get_xticklabels(), rotation=20)
            
            if (i % n_cols) == 0:
                ax.set_ylabel("density function")

        sns.despine(fig=fig)
        plt.tight_layout()

        plt.show()
def componentDensityPlot():
    '''
    obtains a density plot that compares the distribution of components against three model datasets
    '''

    directory = [('bngTest', 'BNG control set'), ('curated', 'BioModels curated'), ('non_curated', 'BioModels non\n curated')]
    #directory = [('curated', 'BioModels curated')]
    #('new_non_curated', 'BioModels non curated')]
    colors = sns.color_palette("Set1", 3)
    colors = [colors[1], colors[2], colors[0]]
    f, (ax1) = plt.subplots(1, 1, sharex=True, figsize=(6, 3.45))
    f.tight_layout() 
    for color, direct in zip(colors, directory):
        totalCount, bindingCount, modifyCount, atoarray = componentAnalysis(direct[0], 0.1)
        sns.kdeplot(totalCount, color=color, label=direct[1], shade=True, ax=ax1, clip=(0.4, 100), bw=0.2)
        #sns.distplot(bindingCount, color=color, ax=ax2, clip=(-0.1, 8), bw=0.5)
        #sns.distplot(modifyCount, color=color, ax=ax3, clip=(-0.1, 8), bw=0.5)
    plt.xlabel('Number of components', fontsize=22,fontweight='bold')
    #f.text(-0.14,0.5,'Model percentage', fontsize=22,fontweight='bold',va='center', rotation='vertical')
    ax1.set_title('Components/molecule')
    #ax2.set_title('Binding components/molecule')
    #ax3.set_title('Modification components/molecule')
    ax1.set_ylabel('Fraction',fontsize=22,fontweight='bold')
    #ax2.set_ylabel('Fraction',fontsize=22,fontweight='bold')
    #ax3.set_ylabel('Fraction',fontsize=22,fontweight='bold')
    plt.tight_layout()
    ax1.set(xlim=(0,10))
    sns.despine()
    plt.savefig('componentDensity2.pdf',bbox_inches='tight')
def  plot_galaxy_and_stars(galaxy, stars):
    
    colors = get_distinct(3)
    single_frame('X [kpc]', 'Y [kpc]')
    xlim = 10
    pyplot.xlim(-xlim, xlim)
    pyplot.ylim(-xlim, xlim)
    ax = pyplot.gca()

    import numpy as np
    import pandas as pd
    from scipy import stats, integrate
    import matplotlib.pyplot as plt
    import seaborn as sns
    sns.set(color_codes=True)

    lim = 10|units.kpc
    p = galaxy.select(lambda x: x<lim,["x"])
    p = p.select(lambda x: x>-lim,["x"])
    p = p.select(lambda y: y<lim,["y"])
    p = p.select(lambda y: y>-lim,["y"])
    p = p.select(lambda r: r.length()>5|units.kpc,["position"])
    x = p.x.value_in(units.kpc)
    y = p.y.value_in(units.kpc)
    sns.kdeplot(x, y, ax=ax, shade=True, n_levels=20, shade_lowest=False)
    m = 100*numpy.sqrt(stars.mass/stars.mass.max())
    pyplot.scatter(stars.x.value_in(units.kpc), stars.y.value_in(units.kpc), c=colors[0], s=m, lw=0)
    pyplot.savefig("SolarSiblings_life_galaxy")
def mag_vs_length():
    # bar = bar[(bar.kind == 'Composite')]
    # sns.lmplot(x='Mr', y='length_scaled', data=bar, hue='kind', palette=flatui, scatter_kws={'s': 9}, fit_reg=False, size=10).set(ylim=(0,1), xlim=(-18, -23))
    for ax in range(1, 6):
        plt.subplot(2, 3, ax)
        sample = bar[bar.kind == kind[ax]]
        sns.kdeplot(sample.length_scaled, sample.Mr, cmap=sns.light_palette(color=flatui[1], as_cmap=True), shade=True, shade_lowest=True).set(xlim=(0, 1.1), ylim=(-18, -23), title=kind[ax])
def plot_averageAsOfLastMonth(df_train):
    sns.kdeplot(df_train['Past One Month'], shade=True, color='r')
    plt.title('Estimate of average days overdue as of past one month')
    plt.xlabel('Average Of days ovedue')
    plt.ylabel('Probability Distribution')
    fig = plt.gcf()
    plt.show()
    fig.savefig('graphs/past_one_month.png')
def plot_averageAsOfPastThreeMonths(df_train):
    sns.kdeplot(df_train['Past Three Months'], shade=True, color='purple')
    plt.title('Estimate of average days overdue as of past three months')
    plt.xlabel('Average of days overdue')
    plt.ylabel('Probabiity Distribution')
    fig = plt.gcf()
    plt.show()
    fig.savefig('graphs/past_three_months.png')
def plot_averageDaysOverdue(df_train):
    sns.kdeplot(df_train['Average Over Due Days'], shade=True, color='g')
    plt.title('Estimate of average days overdue as of December')
    plt.xlabel('Average of days overdue')
    plt.ylabel('Probability Distribution')
    fig = plt.gcf()
    plt.show()
    fig.savefig('graphs/average_overdue_days.png')
Beispiel #28
0
    def createHist(self, event):
        # TODO avoid hardcoding sizes. Find smart way to decide on sizes 
        dlg = GraphDialog(self.parent, "Histogram Input", ("Select Data",),
                size=(500,200))

        # options
        hsize1 = wx.BoxSizer(wx.HORIZONTAL)
        bars = wx.CheckBox(dlg, label="Bars")
        density = wx.CheckBox(dlg, label="Density")
        bars.SetValue(True)
        density.SetValue(True)
        hsize1.Add(bars)
        hsize1.Add(density)
        dlg.Add(hsize1)

        numBins = dlg.AddSpinCtrl("# of Bins", 1, 999, 
                np.sqrt(len(self.parent.data)), size=(50, -1))
        bandwidth = dlg.AddSpinCtrl("Density Bandwidth", -99, 99, 0,
                size=(50, -1))

        bars.Bind(wx.EVT_CHECKBOX, lambda e: numBins.Enable(bars.GetValue()))
        density.Bind(wx.EVT_CHECKBOX, 
                lambda e: bandwidth.Enable(density.GetValue()))

        if dlg.ShowModal() == wx.ID_OK:
            ds = [d[0] for d in dlg.GetName()]
            # account for grouping
            groups, datas = dlg.GetValue(self.parent.data)
            bars, density = bars.GetValue(), density.GetValue() 
            bandwidth = np.exp(-0.2 * bandwidth.GetValue())
            if groups:
                ds = self._groupLabels(ds, groups)
                newDs = []
                for d in ds:
                    newDs += [d + "-" + str(g) for g in groups]
                ds = newDs
            dlg.Destroy()

            # d.min() gets minimum for each column. d.min.min() gets global min
            a, b = min(d.min().min() for d in datas), max(d.max().max() for d in datas)
            bins = np.arange(a, b, float(b-a) / numBins.GetValue())

            for d, data in zip(ds, datas):
                data = data[data.columns[0]]
                d, data = d, data.astype(float)
                # astype float b/c of bug in seaborn. 

                if bars and not density:
                    plt.hist(data, bins=bins, alpha=1.0/len(ds), label=d)
                else:
                    data = data[np.isfinite(data)]
                    bw = stats.gaussian_kde(data).factor * bandwidth
                    if density and not bars:
                        sns.kdeplot(data, shade=True, label=d, bw=bw)
                    else:
                        sns.distplot(data, bins=bins, kde_kws={"bw":bw, "label":d})
            plt.legend(loc='best')
            plt.show()
def FacetGrid():
    sns.set_style("dark",{"axes.facecolor":"black"})
    f, axes = plt.subplots(2,2, figsize=(12,8))
    [Kde(i,axes)  for i in range(0,2)]
    sns.violinplot(data=movies, x = 'Year', y='BudgetMillions', ax=axes[1,0],palette="YlOrRd")
    sns.kdeplot(movies.CriticRating,movies.AudienceRating,shade=True,shade_lowest=False,cmap='Blues_r',ax=axes[1,1])
    sns.kdeplot(movies.CriticRating,movies.AudienceRating,cmap='gist_gray_r', ax=axes[1,1])
    plt.gcf().canvas.set_window_title('Facet Grid')
    plt.show()
Beispiel #30
0
def vis_data(param_sets):
    """
    Visualizes probability distribution
    """
    data = np.array(param_sets)
    print len(data[:, [0, 1]])
    data = pd.DataFrame(data[:, [0, 1]], columns=["X", "Y"])
    sns.kdeplot(data.X, data.Y, shade=True)
    mpl.pyplot.show()
Beispiel #31
0
# another way to do the above
#train_df['Age'].value_counts().sort_index().head(25) 


# In[ ]:


# convert ages to ints
age = train_df[['Age','Survived']].dropna() # returns a copy with blanks removed
age['Age'] = age['Age'].astype(int) # floors floats

# count passengers by age (smoothed via gaussian kernels)
plt.subplots(figsize=(18,6))
plt.subplot(311)
sns.kdeplot(age['Age'], shade=True, cut=0)

# count passengers by age (no smoothing)
plt.subplot(312)
sns.countplot(x='Age', data=age, palette='GnBu_d')

# survival rates by age
plt.subplot(313)
sns.barplot(x='Age', y='Survived', data=age, ci=None, palette='Oranges_d') # takes mean by default


# Observations:
# 
#  - Under 16s tend to have the highest survival rates
#  - Very high survival rates at 53, 63 and 80
#  - Survival of over 16s is fairly noisy. Possible that survival might increase with age.
f.savefig(os.path.join(impath, fname))
"""
RT distribution
"""

f, axes = plt.subplots(
    n_tasks,
    1,
    figsize=(10, 8),
    sharex=True,
    sharey=True,
)

for i, condition in enumerate(CONDITIONS):
    temp = sns.kdeplot(RTs_cn[condition][~np.isnan(RTs_cn[condition])],
                       shade=True,
                       ax=axes[0])
for i, condition in enumerate(CONDITIONS):
    sns.kdeplot(RTs_wr[condition][~np.isnan(RTs_wr[condition])],
                shade=True,
                ax=axes[1])
    axes[0].legend(CONDITIONS, frameon=False)

for i, ax in enumerate(axes):
    ax.set_ylabel('Probability, KDE')
    ax.set_title(f'RT distribution, {TASKS[i]}')
axes[1].set_xlabel('Reaction time')
sns.despine()
f.tight_layout()

# save fig
Beispiel #33
0
chain_kg = trace_kg[1000:]
varnames_kg = ['p']
pm.traceplot(chain_kg, varnames_kg)
plt.show()

with pm.Model() as model_ug:
    p = pm.Dirichlet('p', a=np.ones(clusters))
    category = pm.Categorical('category', p=p, shape=n_total)

    means = pm.Normal('means', mu=[10, 20, 35], sd=2, shape=clusters)
    sd = pm.HalfCauchy('sd', 5)

    y = pm.Normal('y', mu=means[category], sd=sd, observed=mix)

    step1 = pm.ElemwiseCategorical(vars=[category], values=range(clusters))
    step2 = pm.Metropolis(vars=[p])
    trace_ug = pm.sample(10000, step=[step1, step2])
chain_ug = trace_ug[1000:]
varnames_ug = ['means', 'sd', 'p']
pm.traceplot(chain_ug, varnames_ug)
plt.show()

ppc = pm.sample_ppc(chain_ug, 100, model_ug)
for i in ppc['y']:
    sns.kdeplot(i, alpha=0.1, color='b')

sns.kdeplot(np.array(mix), lw=2, color='k')
plt.xlabel('$x$', fontsize=14)
plt.show()
Beispiel #34
0
with open('output.json', 'r') as json_file:
    data = json.load(json_file)

# Data Visulasation
new_df = pd.DataFrame(data)

corr = new_df.corr()
plt.figure(figsize=(10, 7))
sns.heatmap(corr, annot=True)

# Scatter plot between Hour and interactions

fig, ax = plt.subplots(1, figsize=(12, 8))
sns.kdeplot(new_df.Hour,
            new_df.TotalInteractions,
            cmap='Blues',
            shade=True,
            thresh=0.05,
            clip=(-1, 300))

# Findeing the most interactions in the month of December

new_df['day'] = pd.DatetimeIndex(new_df['date']).day
daysforplot = new_df.groupby('day',
                             as_index=False).agg({'TotalInteractions': 'sum'})
fig = px.scatter(daysforplot,
                 x='day',
                 y='TotalInteractions',
                 color_continuous_scale='Rainbow',
                 color='TotalInteractions',
                 size='TotalInteractions',
                 title='Most engaging days')
Beispiel #35
0
    def plot_search(self, method, xy, ax):
        """
        selected over the possible options a search algorithm
        Args:
            method:
            xy:
            ax:

        Returns:

        """
        if self.plot_contour_xy:
            ax.collections = []  # TODO: Improve this
            ax = sns.kdeplot(self.x_list, self.y_list, ax=ax, color="red")
            self.trigger()

        if self.plot_xy:
            ax.plot(self.x_list,
                    self.y_list,
                    color=self.point_color,
                    marker=self.marker,
                    markersize=self.marker_size,
                    linestyle=self.linestyle)

        if method == self.options[1]:
            # self.activate_frame_capture = True
            self.x_list = []
            self.y_list = []
            self.x_list, self.y_list = self.mcmc_random(xy, self.mesh)

        elif method == self.options[2]:
            # self.activate_frame_capture = False
            if self.x is None and self.y is None:
                self.x_list = []
                self.y_list = []
                if xy is not None:
                    self.x, self.y = xy[0], xy[1]
                else:
                    self.x, self.y = self.init_search

            self.x, self.y = self.mcmc_random_step(self.mesh, self.x, self.y,
                                                   ax)

        elif method == self.options[3]:
            # self.activate_frame_capture = True
            self.x_list = []
            self.y_list = []
            self.x_list, self.y_list = self.mcmc_adaptiveMH(self.mesh)

        elif method == self.options[4]:
            # self.activate_frame_capture = False
            if self.x is None and self.y is None:
                self.x_list = []
                self.y_list = []
                if xy is not None:
                    self.x, self.y = xy[0], xy[1]
                else:
                    self.x, self.y = self.init_search

            self.x, self.y = self.mcmc_adaptiveMH_step(self.mesh, self.x,
                                                       self.y, ax)

        elif method == self.options[5]:
            # self.activate_frame_capture = True
            self.x_list = []
            self.y_list = []
            self.x_list, self.y_list = self.mcmc_hamiltonianMC(
                self.mesh_hm, self.mesh_dx_hm, self.mesh_dy_hm)
        elif method == self.options[6]:
            # self.activate_frame_capture = False
            if self.x is None and self.y is None:
                self.x_list = []
                self.y_list = []
                if xy is not None:
                    self.x, self.y = xy[0], xy[1]
                else:
                    self.x, self.y = self.init_search

            self.x, self.y = self.mcmc_hamiltonianMC_step(
                self.mesh_hm, self.mesh_dx_hm, self.mesh_dy_hm, self.x, self.y,
                ax)
        else:
            return False

        return True
Beispiel #36
0
def run(output="output/"):
    X, Y, x, f, _ = make_data()
    Y = np.atleast_2d(Y).T

    plt.plot(X, Y, 'kx', mew=2)
    plt.savefig(os.path.join(output, "gpflow_input_data.png"))
    plt.show()
    plt.close()

    m1 = evalHandcrafted(X, Y)
    gp.gp_gpflow.plot(X,
                      Y,
                      x,
                      m1,
                      'handcrafted GP model',
                      f,
                      output=os.path.join(output,
                                          "gpflow_handcrafted_model.png"))
    print(m1.as_pandas_table())
    m1.clear()

    _, m2 = gp.gp_gpflow.evalMLE(X, Y)
    gp.gp_gpflow.plot(X,
                      Y,
                      x,
                      m2,
                      'MLE-fitted model',
                      f,
                      output=os.path.join(output, "gpflow_mle.png"))
    print(m2.as_pandas_table())

    # plot the function posterior
    plt.figure(figsize=(12, 6))
    num_samples = 10
    ff = m2.predict_f_samples(x, num_samples, initialize=False)
    plt.plot(np.stack([x[:, 0]] * num_samples).T,
             ff[:, :, 0].T,
             'C0',
             lw=2,
             alpha=0.1)
    plt.plot(X, Y, 'kx', mew=2)
    _ = plt.xlim(x.min(), x.max())
    plt.title('Posterior samples - MLE')
    plt.savefig(os.path.join(output, "gpflow_mle_posterior_samples.png"))
    plt.show()
    plt.close()
    m2.clear()

    traces, m3 = gp.gp_gpflow.evalMCMC(X, Y)
    gp.gp_gpflow.plot(X,
                      Y,
                      x,
                      m3,
                      'MCMC-fitted model',
                      f,
                      output=os.path.join(output, "gpflow_mcmc.png"))
    print(m3.as_pandas_table())

    fig = plt.figure(figsize=(8, 4))
    cmap = matplotlib.cm.hot
    norm = matplotlib.colors.Normalize(vmin=0, vmax=traces.shape[1])
    axs0 = plt.subplot2grid((1, 5), (0, 0), rowspan=1, colspan=1, fig=fig)

    j = 0
    for i, col in traces.iteritems():
        sns.kdeplot(col,
                    ax=axs0,
                    label=col.name,
                    shade=True,
                    vertical=True,
                    color=cmap(norm(j)))
        j += 1

    axs1 = plt.subplot2grid((1, 5), (0, 1), rowspan=1, colspan=4, fig=fig)
    j = 0
    for i, col in traces.iteritems():
        axs1.plot(col, label=col.name, color=cmap(norm(j)))
        j += 1

    axs0.get_legend().remove()

    axs1.legend(loc=0)
    axs1.set_xlabel('HMC iteration')
    axs1.set_ylabel('parameter value')

    axs0.set_ylim(axs1.get_ylim())
    axs0.set_xticks([])

    plt.suptitle('HMC traces')
    plt.tight_layout()
    plt.savefig(os.path.join(output, "gpflow_mcmc_traces.png"))
    plt.show()
    plt.close()

    ###################################

    fig = plt.figure(figsize=(12, 4))
    axs0 = plt.subplot2grid((3, 3), (0, 0), rowspan=2, colspan=1, fig=fig)

    axs0.plot(traces['GPR/likelihood/variance'],
              traces['GPR/kern/variance'],
              'k.',
              alpha=0.15)
    axs0.set_xlabel('noise_variance')
    axs0.set_ylabel('signal_variance')

    axs01 = plt.subplot2grid((3, 3), (2, 0), rowspan=1, colspan=1, fig=fig)
    sns.distplot(traces['GPR/likelihood/variance'], color='m', ax=axs01)
    axs01.set_xlim(axs0.get_xlim())
    plt.setp(axs01, yticks=[])

    axs1 = plt.subplot2grid((3, 3), (0, 1), rowspan=2, colspan=1, fig=fig)

    axs1.plot(traces['GPR/kern/lengthscales'],
              traces['GPR/likelihood/variance'],
              'k.',
              alpha=0.15)
    axs1.set_xlabel('lengthscale')
    axs1.set_ylabel('noise_variance')

    axs11 = plt.subplot2grid((3, 3), (2, 1), rowspan=1, colspan=1, fig=fig)
    sns.distplot(traces['GPR/kern/lengthscales'], color='m', ax=axs11)
    axs11.set_xlim(axs1.get_xlim())
    plt.setp(axs11, yticks=[])

    axs2 = plt.subplot2grid((3, 3), (0, 2), rowspan=2, colspan=1, fig=fig)

    axs2.plot(traces['GPR/kern/variance'],
              traces['GPR/kern/lengthscales'],
              'k.',
              alpha=0.1)
    axs2.set_xlabel('signal_variance')
    axs2.set_ylabel('lengthscale')

    axs21 = plt.subplot2grid((3, 3), (2, 2), rowspan=1, colspan=1, fig=fig)
    sns.distplot(traces['GPR/kern/variance'], color='m', ax=axs21)
    axs21.set_xlim(axs2.get_xlim())
    plt.setp(axs21, yticks=[])

    fig.suptitle('HMC (joint) distribution')
    plt.tight_layout()
    plt.savefig(os.path.join(output, "gpflow_mcmc_joint_distribution.png"))
    plt.show()
    plt.close()

    # plot the function posterior
    plt.figure(figsize=(12, 6))
    f_samples = []
    nn = 1

    # print("traces.shape=", traces.shape)
    # print("traces.iloc[::10].shape=", traces.iloc[::10].shape)
    # print("traces.iloc[::20].shape=", traces.iloc[::20].shape)

    for i, s in traces.iloc[::10].iterrows():
        f = m3.predict_f_samples(x,
                                 nn,
                                 initialize=False,
                                 feed_dict=m3.sample_feed_dict(s))
        f_samples.append(f)
        plt.plot(np.stack([x[:, 0]] * nn).T,
                 f[:, :, 0].T,
                 'C0',
                 lw=2,
                 alpha=0.02)

    f_samples = np.array(f_samples)

    line, = plt.plot(x, np.mean(f_samples, axis=(0, 1)), lw=2)

    plt.fill_between(x[:, 0],
                     np.percentile(f_samples, 5, axis=(0, 1, 3)),
                     np.percentile(f_samples, 95, axis=(0, 1, 3)),
                     color=line.get_color(),
                     alpha=0.1)

    plt.plot(X, Y, 'kx', mew=2)
    _ = plt.xlim(x.min(), x.max())
    # _ = plt.ylim(0, 6)
    plt.title('Posterior samples - MCMC')
    plt.savefig(os.path.join(output, "gpflow_mcmc_posterior_samples.png"))
    plt.show()
    plt.close()
    m3.clear()
Beispiel #37
0
import matplotlib.pyplot as plt
import seaborn as sns

student_table = pd.read_csv('StudentsPerformance (1).csv')
print("COLUMNS: ")
print(student_table.columns.tolist())
print(student_table.gender.unique())
print(student_table['race/ethnicity'].unique())
print(student_table['parental level of education'].unique())
print(student_table['lunch'].unique())
print(student_table['test preparation course'].unique())
print(student_table.info())
#print(sns.distplot(student_table[['math score','reading score','writing score']]))
#sns.distplot(student_table['math score'],bins=11,hist_kws=dict(edgecolor='yellow',linewidth=3,color='green'))
#sns.distplot(student_table['reading score'],bins=11,hist_kws=dict(edgecolor='yellow',linewidth=3,color='green'))
print(sns.kdeplot(student_table['math score'], shade=True))
print(sns.kdeplot(student_table['reading score'], shade=True))
print(sns.kdeplot(student_table['writing score'], shade=True))

# **OBSERVATIONS**
#
# * The table has 8 columns and 1000 rows with three int64, five object data types and no null values.
# * There are 3 numerical and 5 categorical data
# * The Sns kdeplot helps to get a view of the three distributions i.e Math score, reading score and writing score.

# In[6]:

print(student_table.describe())
plt.rcParams['figure.figsize'] = (30, 20)
sns.countplot(student_table['math score'], palette='dark')
plt.title('Math Score', fontsize=25)
Beispiel #38
0
            array = np.delete(array, list(array).index(i))
    titles = [r'$h = \frac{h_n}{2}$', r'$h = h_n$', r'$h = 2 * h_n$']
    l = 0
    fig, ax = plt.subplots(1, 3)
    plt.subplots_adjust(wspace=0.5)
    for bandwidth in [0.5, 1, 2]:
        kde = stats.gaussian_kde(array, bw_method='silverman')
        h_n = kde.factor
        fig.suptitle('Normal, n = ' + str(quan_of_numbers[k - 1]))
        ax[l].plot(array_global,
                   stats.norm.pdf(array_global, 0, 1),
                   color='blue',
                   alpha=0.5,
                   label='density')
        ax[l].set_title(titles[l])
        sns.kdeplot(array, ax=ax[l], bw=h_n * bandwidth, label='kde')
        ax[l].set_xlabel('x')
        ax[l].set_ylabel('f(x)')
        ax[l].set_ylim([0, 1])
        ax[l].set_xlim([-4, 4])
        ax[l].legend()
        l += 1
    plt.show()
    k += 1

array_20 = np.random.standard_cauchy(20)
array_60 = np.random.standard_cauchy(60)
array_100 = np.random.standard_cauchy(100)
arrays = [array_20, array_60, array_100]
j = 1
array_global = np.arange(-4, 4, 0.01)
Hops = [np.array([nx.shortest_path_length(synth_net,n,sub) \
                              for n in list(synth_net.nodes())]),
        np.array([nx.shortest_path_length(tree,n,sub) \
                              for n in list(tree.nodes())])]

Dist = [np.array([nx.shortest_path_length(synth_net,n,sub,weight='geo_length') \
                              for n in list(synth_net.nodes())])*1e-3,
        np.array([nx.shortest_path_length(tree,n,sub,weight='geo_length') \
                              for n in list(tree.nodes())])*1e-3]

import matplotlib.pyplot as plt
import seaborn as sns
col = ['r', 'g', 'b']
fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
sns.kdeplot(Hops[0], shade=False, color='r', label='Optimal network')
sns.kdeplot(Hops[1], shade=False, color='g', label='Random network')
ax.set_ylabel('Percentage of nodes', fontsize=20)
ax.set_xlabel('Hops from root node', fontsize=20)
ax.set_title("Hop distribution", fontsize=20)
ax.legend(loc='best', ncol=1, prop={'size': 20})
labels = ax.get_yticks()
ax.set_yticklabels(["{:.1f}".format(100.0 * i) for i in labels])
fig.savefig("{}{}.png".format(figpath + suffix, 'hopcomp'),
            bbox_inches='tight')

fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
sns.kdeplot(Dist[0], shade=False, color='r', label='Optimal network')
sns.kdeplot(Dist[1], shade=False, color='g', label='Random network')
ax.set_ylabel('Percentage of nodes', fontsize=20)
Beispiel #40
0
v2 = pd.Series(2 * v1 + np.random.normal(60, 15, 1000), name='v2')

# In[ ]:

plt.figure()
plt.hist(v1, alpha=0.7, bins=np.arange(-50, 150, 5), label='v1')
plt.hist(v2, alpha=0.7, bins=np.arange(-50, 150, 5), label='v2')
plt.legend()

# In[ ]:

# plot a kernel density estimation over a stacked barchart
plt.figure()
plt.hist([v1, v2], histtype='barstacked', normed=True)
v3 = np.concatenate((v1, v2))
sns.kdeplot(v3)

# In[ ]:

plt.figure()
# we can pass keyword arguments for each individual component of the plot
sns.distplot(v3, hist_kws={'color': 'Teal'}, kde_kws={'color': 'Navy'})

# In[ ]:

sns.jointplot(v1, v2, alpha=0.4)

# In[ ]:

grid = sns.jointplot(v1, v2, alpha=0.4)
grid.ax_joint.set_aspect('equal')
Beispiel #41
0
for wi, w in enumerate(which_plots):
    plt.close('all')
    fig, axes = plt.subplots(1,
                             2,
                             figsize=(FIGURE_WIDTH / 2, FIGURE_HEIGHT),
                             sharex=True,
                             sharey=True)
    for task, taskname, ax in zip(['traini', 'biased'],
                                  ['Basic task', 'Full task'], axes):

        # bivariate KDE
        sns.kdeplot(data=history_shift[(history_shift.task == task)].dropna(
            subset=[w[0], w[1]])[w[0]],
                    data2=history_shift[(history_shift.task == task)].dropna(
                        subset=[w[0], w[1]])[w[1]],
                    shade=True,
                    shade_lowest=False,
                    cmap='Greys',
                    ax=ax)

        # individual points
        sns.lineplot(x=w[0],
                     y=w[1],
                     units='subject_nickname',
                     estimator=None,
                     color='black',
                     alpha=0.3,
                     data=history_shift[(history_shift.task == task)],
                     marker='o',
                     ax=ax,
                     legend=False,
Beispiel #42
0
# Configure the test data set with anomalous employment dates

app_test['DAYS_EMPLOYED_ANOM'] = app_test['DAYS_EMPLOYED'] == 365243
app_test['DAYS_EMPLOYED'].replace({365243: np.nan}, inplace = True)

print('There are %d anomalies in the test data out of %d entries' 
      % (app_test['DAYS_EMPLOYED_ANOM'].sum(), len(app_test)))

# Find correlations with the target and sort

correlations = app_train.corr()['TARGET'].sort_values()

# Display correlations
print('Most Positive Correlations:\n', correlations.tail(15))
print('\nMost Negative Correlations:\n', correlations.head(15))

# Find the correlation of the positive days since birth and target
app_train['DAYS_BIRTH'] = abs(app_train['DAYS_BIRTH'])
app_train['DAYS_BIRTH'].corr(app_train['TARGET'])

plt.style.use('fivethirtyeight')

# Plot the distribution of ages in years
plt.hist(app_train['DAYS_BIRTH'] / 365, edgecolor = 'k', bins = 25)
plt.title('Age of Client'); plt.xlabel('Age (years)'); plt.ylabel('Count');

plt.figure(figsize = (10,8))

# KDE plot of loans that were repaid on time
sns.kdeplot(app_train.loc[app_train['TARGET'] == 0, 'DAYS_BIRTH']/365, label = 'target ==0')
Beispiel #43
0
plt.hist2d(x, y, bins=(15, 15), cmap=plt.cm.jet)
plt.xlabel("amygdala")
plt.ylabel("acc")
plt.title("2-D Histogram")
plt.colorbar()
plt.show()

#####################################################
# KDE Contour Plot
col = math.ceil(np.sqrt(len(ranges)))
row = math.ceil(np.sqrt(len(ranges)))
fig, ax = plt.subplots(figsize=(10, 10), ncols=col, nrows=row)
for i in range(len(ranges)):
    ax[int(i / col)][i % col].set_title("Orientation " + str(ranges[i]))
    sns.kdeplot(dataset[i]["amygdala"],
                dataset[i]["acc"],
                ax=ax[int(i / col)][i % col])
plt.show()


### alternative method to get the plot
### use the equations provided in the problem rather than packages
def gaussian_kernel(x, y):
    return math.exp(-((x**2) + (y**2)) / 2) / (math.sqrt(2 * math.pi))


def kernel_density_estimate(x_i, y_i, x, y, h):
    prob = 0
    m = len(x)
    for i in range(m):
        prob += gaussian_kernel((x_i - x[i]) / h, (y_i - y[i]) / h)
ax = kc_tax0.plot.hexbin(x='SqFtTotLiving',
                         y='TaxAssessedValue',
                         gridsize=30,
                         sharex=False,
                         figsize=(5, 4))
ax.set_xlabel('Finished Square Feet')
ax.set_ylabel('Tax Assessed Value')

plt.tight_layout()
plt.show()

# The _seaborn_ kdeplot is a two-dimensional extension of the density plot.

fig, ax = plt.subplots(figsize=(4, 4))
ax = sns.kdeplot(kc_tax0.SqFtTotLiving, kc_tax0.TaxAssessedValue, ax=ax)
ax.set_xlabel('Finished Square Feet')
ax.set_ylabel('Tax Assessed Value')

plt.tight_layout()
plt.show()

### Two Categorical Variables
# Load the `lc_loans` dataset

lc_loans = pd.read_csv(LC_LOANS_CSV)

# Table 1-8(1)
crosstab = lc_loans.pivot_table(index='grade',
                                columns='status',
                                aggfunc=lambda x: len(x),
    params,
    train_data,
    150,
    #early_stopping_rounds= 40,
    verbose_eval=4)

#Predict on test set
predictions_lgbm_prob = lgbm.predict(valid_early_x)

auc_lgb = roc_auc_score(valid_early_y, predictions_lgbm_prob)
print('AUC LGBM: {}'.format(auc_lgb))

#Ensemble and predict on test set
print('Predict on test set...')
test_pred_rf = clf_rf.predict_proba(test_df)[:, 1]
test_pred_ridge = clf_ridge.predict(test_df)
test_pred_gbm = lgbm.predict(test_df)

submission = pd.read_csv('sample_submission.csv')
submission['loan_default'] = (test_pred_ridge + test_pred_gbm +
                              test_pred_rf) / 3
submission.to_csv('submission_oof_ensemble_1.csv', index=False)

#plots
import matplotlib.pyplot as plt
import seaborn as sns

sns.kdeplot(test_pred_ridge, label='ridge')
#sns.kdeplot(test_pred_rf, label = 'rf')
sns.kdeplot(test_pred_gbm, label='gbm')
#sns.kdeplot(submission['loan_default'].values, label = 'ensemble')
    else:
        return 1


data["Sts_Val"] = data.apply(sts_val, axis=1)
data

#      <<<   BMI Report generation  >>>

data["Gender"].value_counts()

data["Status"].value_counts()

sns.jointplot(x='', y="Weight", data=data, kind="kde")

sns.kdeplot(data=data['Sts_Val'], data2=data["Weight"])

sns.barplot(x='Sts_Val', y='Weight', data=data, hue="Gender")

sns.countplot(x='Gender', data=data, hue='Sts_Val')

sns.boxplot(x='Sts_Val', y='Weight', data=data, hue='Gender')

sns.violinplot(x='Sts_Val', y='Weight', data=data, hue='Gender')

sns.stripplot(x='Sts_Val', y='Height', data=data, hue='Gender', dodge=True)

sns.catplot(x='Sts_Val', y='Height', data=data, hue='Gender', col='Gender')

sns.set_style('whitegrid')
sns.lmplot(
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns

df = pd.read_csv(
    "C:\\Users\\Arun\Documents\\shanu\\kaggle\\googleplaystore.csv")
df.describe()
df.info()

df.isna().sum().sort_values(ascending=False)

df.dropna(how="any", inplace=True)

df.isna().sum().sort_values(ascending=False)

sns.kdeplot(df["Rating"], legend=True)
plt.show()
# Rating ranges between 4 and 5 and so many have given it

df["Rating"].mean()
sns.kdeplot(df["Rating"], legend=True)
plt.show()
Beispiel #48
0
                a = po.get_action(s)
                # env.render()

                s_, r, done, _ = env.step(a * high[0])

                s_list.append(s)

                s = s_

                if done:

                    game_num += 1

                    break

            if game_num >= 500:

                for state_index in range(obs_dim):
                    this_state = np.array([state[state_index] for state in s_list])

                    ax.hist(this_state, bins=100, histtype="stepfilled", normed=True, alpha=0.6)
                    sns.kdeplot(this_state, shade=True)

                    plt.savefig("./{}_distribute/state[{}].jpg".format(policy_type, state_index))
                    plt.close()

                break



Beispiel #49
0
sb32.py

Ref:
https://seaborn.pydata.org/examples/index.html
https://seaborn.pydata.org/examples/cubehelix_palette.html
"""

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="dark")
rs = np.random.RandomState(50)

# Set up the matplotlib figure
f, axes = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True)

# Rotate the starting point around the cubehelix hue circle
for ax, s in zip(axes.flat, np.linspace(0, 3, 10)):

    # Create a cubehelix colormap to use with kdeplot
    cmap = sns.cubehelix_palette(start=s, light=1, as_cmap=True)

    # Generate and plot a random bivariate dataset
    x, y = rs.randn(2, 50)
    sns.kdeplot(x, y, cmap=cmap, shade=True, cut=5, ax=ax)
    ax.set(xlim=(-3, 3), ylim=(-3, 3))

f.tight_layout()
plt.show()
        'promotion_last_5years': 'promotion',
        'sales': 'department',
        'left': 'turnover'
    })
front = df['turnover']
df.drop(labels=['turnover'], axis=1, inplace=True)
df.insert(0, 'turnover', front)

corr = df.corr()
sns.heatmap(data=corr,
            yticklabels=corr.columns.values,
            xticklabels=corr.index.values)
plt.show()

fig = plt.figure(figsize=(15, 4))
sns.kdeplot(x='satisfaction', data=df, hue='turnover', shade=True)
plt.show()

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, confusion_matrix, \
    precision_recall_curve, roc_auc_score

df['department'] = df['department'].astype('category').cat.codes
df['salary'] = df['salary'].astype('category').cat.codes

target_name = 'turnover'
x = df.drop('turnover', axis=1)
y = df[target_name]

x_train, x_test, y_train, y_test = train_test_split(x,
    def __getPlots2(self, fig, axes, color2):
        ttc = []
        yyc = []
        erc = []
        yyem = []
        ttem = []
        erem = []
        for j in range(self.numLC):
            rr = self.mjds[j] - self.mjds[j].min()
            mx = np.int(np.ceil(rr.max())) + 1
            t, y, e = LC_opsim(self.mjds[j], self.t[150:mx + 150],
                               self.y[150:mx + 150,
                                      j], self.greska2[150:mx + 150, j])
            ttc.append(t)
            yyc.append(y)
            erc.append(e)
            t, y, e = LC_opsim(self.mjds[j], self.tp[150:mx + 150],
                               self.response[150:mx + 150, j],
                               self.greska2e[150:mx + 150, j])
            ttem.append(t)
            yyem.append(y)
            erem.append(e)

        import statistics
        # https://www.aanda.org/articles/aa/full_html/2013/11/aa21781-13/aa21781-13.html
        fvarc = []
        fvarem = []
        meanerc = []
        meanerm = []
        for j in range(self.numLC):
            tc = ttc[j]
            c = yyc[j]
            erc1 = erc[j]
            stdc2 = np.std(c**2)
            erc2m = np.mean(erc1)
            ercm = 100 * np.mean(erc1) / np.mean(c)
            te = ttem[j]
            em = yyem[j]
            erm = erem[j]
            erm2m = np.mean(erm)
            ermm = 100 * np.mean(erm) / np.mean(em)
            stdem2 = np.std(em**2)
            meanerc.append(100 * np.mean(erc1 / c))
            meanerm.append(100 * np.mean(erm / em))
            fvarc.append(np.sqrt(np.std(c**2) - erc2m) / (np.mean(c)))
            fvarem.append(np.sqrt(np.std(em**2) - erm2m) / (np.mean(em)))

        caden = []
        brojposm = []
        for j in range(self.numLC):
            tc = self.mjds[j]
            caden.append(np.mean(np.diff(tc)))
            brojposm.append(len(self.mjds[j]))

        zz = 0.05

        lags = np.asarray(self.lags)
        fvarc = np.asarray(fvarc)
        meanerc = np.asarray(meanerc)
        caden = np.asarray(caden)

        xx = np.array(fvarc) / np.array(meanerc)

        yy = lags / ((1 + zz) * caden)

        zzcrt = -3.356 * xx - 0.2638 * yy

        zzzcrtred = (-0.002415) * xx - 3.97756 * yy

        sns.kdeplot(zzzcrtred,
                    shade=None,
                    ax=axes,
                    alpha=0.3,
                    label='filter ' + self.fil,
                    color=color2)
        kdeline1 = axes.lines[0]
        xs1 = kdeline1.get_xdata()
        ys1 = kdeline1.get_ydata()

        xp = np.linspace(xx.min(), xx.max(), 50)
        yp = np.linspace(yy.min(), yy.max(), 50)
        xxx, yyy = np.meshgrid(xp, yp)
        zzz = (-0.002415) * xxx - 3.97756 * yyy
                         label='ground truth', bins=150)

            plt.xlabel('Partial charge', fontsize=fontsize_label_legend, **hfont)
            plt.ylabel('No of atoms', fontsize=fontsize_label_legend, **hfont)
            plt.legend(frameon=False, prop={"family": "Times New Roman", 'size': fontsize_label_legend})
            plt.tick_params(axis='both', which='major', labelsize=17)
            plt.savefig('results/graphs/ground_distplot.png', format='png', dpi=300, bbox_inches="tight")
            plt.show()

            # -------------------------------------------------------------------
            # histogram of prediction and ground truth

            colors = ['green', 'dodgerblue', 'deeppink']
            plt.figure(figsize=(8, 8), dpi=80)

            sns.kdeplot(label.cpu().numpy(), shade=True, color="orange", label="ground truth", alpha=.7)
            sns.kdeplot(pred.cpu().numpy(), shade=True, color=colors[index], label=system, alpha=.7)
            plt.xlabel('Partial charge', fontsize=fontsize_label_legend, **hfont)
            plt.ylabel('No of atoms', fontsize=fontsize_label_legend, **hfont)
            plt.legend(frameon=False, prop={"family": "Times New Roman", 'size': fontsize_label_legend})
            plt.tick_params(axis='both', which='major', labelsize=17)
            plt.savefig('results/graphs/ground_{}_histogram.png'.format(system), format='png', dpi=300,
                        bbox_inches="tight")

            plt.show()

            # -------------------------------------------------------------------
            #             # saving mean sigmas of elements
            #             element_types_labels = np.zeros(len(label))

            #             for element_index in range(elements_number):
def train_and_predict(csv_file, build_new=True):
    ''' Build and train a new model or continue training a saved model. Includes 
    density plots of distances between the images of positive and negative pairs 
    before and after training for a first sanity and consistency check.
    '''
    X_train, X_val, X_test, y_train, y_val, y_test = split_pairdata(csv_file)
    
    pos_pairs = np.concatenate((X_train[y_train==1], X_val[y_val==1], X_test[y_test==1]))
    neg_pairs = np.concatenate((X_train[y_train==0], X_val[y_val==0], X_test[y_test==0]))
    
    if build_new:
        model = siam_cnn()
        optimizer = RMSprop()
        model.compile(loss=contrastive_loss, optimizer=optimizer)
        print("Model compiled.")
    else:
        model = load_model('models/modelxx.h5', custom_objects={'contrastive_loss': contrastive_loss})
        print('Model loaded.')
        
    untrained_pred_pos = model.predict([pos_pairs[:,0], pos_pairs[:,1]])
    untrained_pred_neg = model.predict([neg_pairs[:,0], neg_pairs[:,1]])
    
    #Density plot of distances before training
    print('Plotting density of distances.. (please exit plot window to continue.)')    
    plt.figure(figsize=(4,4))
    plt.xlabel('Distance')
    plt.ylabel('Frequency')  
    sns.kdeplot(untrained_pred_neg[:,0], shade=True, color='red', label='Distant pairs')
    sns.kdeplot(untrained_pred_pos[:,0], shade=True, color='green', label='Close pairs')
    plt.legend(loc=1)
    #plt.savefig('untrained_pred.png')
    plt.show()
        
    print('Begin training...')
    model.fit([X_train[:,0], X_train[:,1]], y_train,
              validation_data = ([X_val[:,0], X_val[:,1]], y_val),
              batch_size=128,
              nb_epoch=10)
              
    time.sleep(3)
    print('Training finished.')
    #print('Saving model..')    
    #model.save('models/best_model.h5')
    #print('Model saved.')
    
    trained_pred_pos = model.predict([pos_pairs[:,0], pos_pairs[:,1]])
    trained_pred_neg = model.predict([neg_pairs[:,0], neg_pairs[:,1]])
    
    #Density plot of distances after training
    print('Plotting density of distances.. (please exit plot window to continue.)')    
    plt.figure(figsize=(4,4))   
    plt.xlabel('Distance')
    plt.ylabel('Frequency')
    sns.kdeplot(trained_pred_neg[:,0], shade=True, color='red', label='Distant pairs')
    sns.kdeplot(trained_pred_pos[:,0], shade=True, color='green', label='Close pairs')
    plt.legend(loc=1)
    #plt.savefig('trained_pred.png')    
    plt.show()
   
    y_pred = model.predict([X_test[:,0], X_test[:,1]])
    return y_test, y_pred
Beispiel #54
0
Multiple bivariate KDE plots
============================

_thumb: .6, .4
"""
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="darkgrid")
iris = sns.load_dataset("iris")

# Subset the iris dataset by species
setosa = iris.query("species == 'setosa'")
virginica = iris.query("species == 'virginica'")

# Set up the figure
f, ax = plt.subplots(figsize=(8, 8))
ax.set_aspect("equal")

# Draw the two density plots
ax = sns.kdeplot(setosa.sepal_width, setosa.sepal_length,
                 cmap="Reds", shade=True, shade_lowest=False)
ax = sns.kdeplot(virginica.sepal_width, virginica.sepal_length,
                 cmap="Blues", shade=True, shade_lowest=False)

# Add labels to the plot
red = sns.color_palette("Reds")[-2]
blue = sns.color_palette("Blues")[-2]
ax.text(2.5, 8.2, "virginica", size=16, color=blue)
ax.text(3.8, 4.5, "setosa", size=16, color=red)
Beispiel #55
0
def plot_kde(train, test_A, test_B, test_C, col):
    fig, ax = plt.subplots(1, 5)
    sns.kdeplot(train[col], color='g', ax=ax[0])
    sns.kdeplot(test_A[col], color='r', ax=ax[1])
    sns.kdeplot(test_B[col], color='y', ax=ax[2])
    sns.kdeplot(test_C[col], color='m', ax=ax[3])
    sns.kdeplot(train[col], color='g', ax=ax[4])
    sns.kdeplot(test_A[col], color='r', ax=ax[4])
    sns.kdeplot(test_B[col], color='y', ax=ax[4])
    sns.kdeplot(test_C[col], color='m', ax=ax[4])
    plt.title('Distribution_' + col)
    plt.show()
Beispiel #56
0
g = g.set_ylabels("survival probability")


# %% Parch
g  = sns.factorplot(x="Parch",y="Survived",data=train,kind="bar", size = 6 , 
palette = "muted")
g.despine(left=True)
g = g.set_ylabels("survival probability")

# %% Age
g = sns.FacetGrid(train, col='Survived')
g = g.map(sns.distplot, "Age")


# %% Explore Age distribution
g = sns.kdeplot(train["Age"][(train["Survived"] == 0) & (train["Age"].notnull())], color="Red", shade = True)
g = sns.kdeplot(train["Age"][(train["Survived"] == 1) & (train["Age"].notnull())], ax =g, color="Blue", shade= True)
g.set_xlabel("Age")
g.set_ylabel("Frequency")
g = g.legend(["Not Survived","Survived"])


# %% Fare
dataset['Fare'].isnull().sum()


# %%
dataset["Fare"] = dataset["Fare"].fillna(dataset["Fare"].median())

# Explore Fare distribution 
g = sns.distplot(dataset["Fare"], color="m", label="Skewness : %.2f"%(dataset["Fare"].skew()))
Beispiel #57
0
#
# However, I will try to fix *orientation_X* and *orientation_Y* as I explained before, scaling and normalizing data.
#
# ---
#
# ### Now with a new scale (more more precision)

# In[ ]:

plt.figure(figsize=(26, 16))
for i, col in enumerate(aux.columns[3:13]):
    ax = plt.subplot(3, 4, i + 1)
    ax = plt.title(col)
    for surface in classes:
        surface_feature = aux[aux['surface'] == surface]
        sns.kdeplot(surface_feature[col], label=surface)

# ### Histogram for main features

# In[ ]:

plt.figure(figsize=(26, 16))
for i, col in enumerate(data.columns[3:]):
    ax = plt.subplot(3, 4, i + 1)
    sns.distplot(data[col], bins=100, label='train')
    sns.distplot(test[col], bins=100, label='test')
    ax.legend()

# ## Step 0 : quaternions

# Orientation - quaternion coordinates
Beispiel #58
0
import seaborn as sns
import matplotlib.pyplot as plt

# sns.set(style="white", color_codes=True)
# grid = sns.JointGrid(X_embedded[:,0], X_embedded[:,1], space=0, size=6, ratio=50)
# grid.plot_joint(plt.scatter, color="g")
# grid.plot_marginals(sns.rugplot, height=1, color="g")
#

sns.set(style="darkgrid")
f, ax = plt.subplots(figsize=(8, 8))
ax.set_aspect("equal")

# Draw the two density plots
ax = sns.kdeplot(item_embedded[:,0], item_embedded[:,1],
                 cmap="Reds", shade=True, shade_lowest=False)
ax = sns.kdeplot(user_embedded[:,0],user_embedded[:,1],
                 cmap="Blues", shade=True, shade_lowest=False)




import gc
import numpy as np
import  pandas as pd
import  os
from sltools import load_pickle
from scipy.sparse import  vstack
from scipy.sparse import  csr_matrix
from scipy.sparse import  diags
from scipy.sparse import coo_matrix
# In[72]:

# 检查各个数据集的分布和相关性
data_ = np.vstack([
    feature_train.mean(axis=0),
    feature_validation.mean(axis=0),
    feature_test.mean(axis=0)
])
fig, axs = plt.subplots(1, 2, figsize=(10, 5))
sns.heatmap(np.corrcoef(data_), annot=True, ax=axs[0])
axs[0].axis('equal')
axs[0].set_xticklabels(
    ["Trainning dataset", "Validation dataset", "Test dataset"], rotation=45)
axs[0].set_yticklabels(
    ["Trainning dataset", "Validation dataset", "Test dataset"], rotation=0)
sns.kdeplot(data_[0, :], ax=axs[1])
sns.kdeplot(data_[1, :], ax=axs[1])
sns.kdeplot(data_[2, :], ax=axs[1])
plt.title("Distribution", fontsize=20)
plt.legend(["Training dataset", "Validation dataset", "Test dataset"])
plt.subplots_adjust(wspace=1)
plt.show()

# In[73]:

# 规范化数据
scaler = StandardScaler()
feature_train_ = scaler.fit_transform(feature_train)

feature_validation_ = scaler.transform(
    feature_validation)  # 对验证集和测试集在规范化时,要用训练集的参数
Beispiel #60
0
            return 'virginica'


#terget_df = target.apply(rename,axis= 1)
##iris = pd.concat([iris,tar],axis=1)

sns.set_style('whitegrid')
#setosa = iris[iris['target']==0]

#sns.pairplot(iris_sns,hue= 'species', palette='Dark2')
sns.plt.show()

setosa = iris_sns[iris_sns['species'] == 'setosa']
sns.kdeplot(setosa['sepal_width'],
            setosa['sepal_length'],
            cmap="plasma",
            shade=True,
            shade_lowest=False)
#sns.plt.show()
#sns.kdeplot(iris_df['sepal width (cm)'],iris_df['sepal length (cm)'], cmap='Blues',shade=True, shade_lowest=False)
#sns.kdeplot(setosa.sepal_width, setosa.sepal_length,cmap="Reds", shade=True, shade_lowest=False)

#sns.plt.show()

X = iris_sns.drop('species', axis=1)
y = iris_sns['species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

svc_model = SVC()
svc_model.fit(X_train, y_train)