Ejemplo n.º 1
0
def display_trial_stats(df, title_prefix, ylim_bottom, ylim_top):
    """
    Displays summary statistics and time series plot describing the 4 columns in
    a simulation stats DataFrame: The length (number of steps) in each trial of
    the simulation, the total reward for each trial, the total negative reward
    in each trial, and whether each trial reach the designated destination.
    """
    successes = df[df.reached_destination==True].Trial
    failures = df[df.reached_destination==False].Trial

    print "The destination was reached in {} out of {} trials.".format(successes.shape[0], df.shape[0])
    display(df[['total_reward', 'negative_reward', 'trial_length']].describe().T)

    sns.set(font_scale=1.5, style={"axes.facecolor": "white"})
    sns.plt.figure(figsize=(16, 8))
    ax = sns.tsplot(df.trial_length, color='.75', legend=True, condition='Trial Length')
    ax = sns.tsplot(df.total_reward, color='#106B70', legend=True, condition='Total Reward')
    ax = sns.tsplot(df.negative_reward, color='#D43500', legend=True, condition='Negative Reward')
    ax = sns.rugplot(successes, color='green', height=1, linewidth=10, alpha=0.1)
    ax = sns.rugplot(failures, color='red', height=1, linewidth=10, alpha=0.1)
    sns.plt.legend(labels=['Trial Length', 'Total Reward', 'Negative Reward', 'Reached Destination'], frameon=True)
    ax.set(xlabel='Trial', ylabel='Value')
    ax.set_title(title_prefix + ': Trial Length, Total Reward, and Negative Reward for each Trial')
    sns.plt.ylim(ylim_bottom, ylim_top)
    sns.plt.plot([0, 100], [0, 0], linewidth=1, color='.5')
Ejemplo n.º 2
0
def plot_rugplot(df, column='AirTime', jitter=0.0, seed=0):
    """
    Plots a rug plot.

    Parameters
    ----------
    df: A pandas.DataFrame
    column: The column to use in "df"
    jitter: An int or float. Default: 0.
            If jitter > 0, uses numpy.random.normal() to draw
            random samples from a normal distribution with zero mean
            and standard deviatation equal to "jitter".
    seed: An int. Used by numpy.random.seed().

    Returns
    -------
    A matplotlib.axes.Axes
    """

    fig, ax = plt.subplots(figsize=(10, 6))
    ax.set_xlabel(column)
    ax.set_ylim(0, 1)
    rand = 0
    if (jitter > 0):
        np.random.seed(seed)
        rand = np.random.normal(0, jitter, len(df[column]))
    x = df[column] + rand

    sns.rugplot(x, height=0.5, ax=ax)

    return ax
def kde_tissue(tissue, q, genes, x, y, dfplot, dfindex, ax, label, col= 'b'):
    """
    Plots all the tissue specific genes,i.e. all genes that appear in one and only
    one 'tissue'
    tissue -- tissue to plot
    q -- qvalue to slice on
        
    dfindex -- the dataframe generated by organizer
    
    dfplot -- the dataframe containing columns x, y and genes
    x -- the name of the column containing the values  to plot in the histogram
    y -- the name of the column with which to slice the dataframe (q or p value)
    genes -- the name of the column containing the WBID names
    
    label -- name of the plot just made
    ax -- axis to plot in
    col -- color
    """
    g= lambda x:((dfindex.expressed == 1) & (dfindex.tissue == x))\
       # & (~dfindex[dfindex.expressed == 1].duplicated('gene')) 
    f= lambda x: (dfplot[genes].isin(x)) & (dfplot[y] < q)
    
    gene_selection= g(tissue)    
    genes_to_plot= dfindex[gene_selection].gene
    
    ind= f(genes_to_plot)
    to_plot= dfplot[ind][x]
    
    n= len(dfplot[ind][genes].unique())
    if len(to_plot) > 15:
        sns.kdeplot(to_plot, color= col,label= label+' n= {0}'.format(n), ax= ax, 
                    lw= 5, cut=0.5)        
        if len(to_plot) <= 20:
            sns.rugplot(to_plot, color= col, ax= ax, height= .07, lw= 2)
Ejemplo n.º 4
0
def dist_small_multiples(df, figsize=(20, 20)):
    """
    Small multiples plots of the distribution of a dataframe's variables.
    """
    import math

    sns.set_style("white")

    num_plots = len(df.columns)
    n = int(math.ceil(math.sqrt(num_plots)))

    fig = plt.figure(figsize=figsize)
    axes = [plt.subplot(n, n, i) for i in range(1, num_plots + 1)]

    i = 0
    for k, v in df.iteritems():
        ax = axes[i]
        sns.kdeplot(v, shade=True, ax=ax, legend=False)
        sns.rugplot(v, ax=ax, c=sns.color_palette("husl", 3)[0])
        [label.set_visible(False) for label in ax.get_yticklabels()]
        ax.xaxis.set_ticks([v.min(), v.max()])
        ax.set_title(k)
        i += 1
    sns.despine(left=True, trim=True, fig=fig)
    plt.tight_layout()
    return fig, axes
Ejemplo n.º 5
0
def plot_load(load, nb_resources=None, ax=None, normalize=False,
              time_scale=False, load_label="load",
              UnixStartTime=0, TimeZoneString='UTC'):
    '''
    Plots the number of used resources against time
    :normalize: if True normalize by the number of resources
    `nb_resources`
    '''
    mean = metrics.load_mean(load)
    u = load.copy()

    if time_scale:
        # make the time index a column
        u = u.reset_index()
        # convert timestamp to datetime
        u.index = pd.to_datetime(u['time'] + UnixStartTime,
                                 unit='s')
        u.index.tz_localize('UTC').tz_convert(TimeZoneString)

    if normalize and nb_resources is None:
        nb_resources = u.load.max()

    if normalize:
        u.load = u.load / nb_resources
        mean = mean / nb_resources

    # get an axe if not provided
    if ax is None:
        ax = plt.gca()

    # leave room to have better view
    ax.margins(x=0.1, y=0.1)

    # plot load
    u.load.plot(drawstyle="steps-post", ax=ax, label=load_label)

    # plot a line for max available area
    if nb_resources and not normalize:
        ax.plot([u.index[0], u.index[-1]],
                [nb_resources, nb_resources],
                linestyle='-', linewidth=2,
                label="Maximum resources ({})".format(nb_resources))

    # plot a line for mean utilisation
    ax.plot([u.index[0], u.index[-1]],
            [mean, mean],
            linestyle='--', linewidth=1,
            label="Mean {0} ({1:.2f})".format(load_label, mean))
    sns.rugplot(u.load[u.load == 0].index, ax=ax, color='r')
    ax.scatter([], [], marker="|", linewidth=1, s=200,
               label="Reset event ({} == 0)".format(load_label), color='r')
    # FIXME: Add legend when this bug is fixed
    # https://github.com/mwaskom/seaborn/issues/1071

    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
def kde_value(value, q, dfplot, dfindex, ax, label, col= 'b', min_length= 10, rug_length= 20):
    """
    Plots all the value specific genes,i.e. all genes that appear in one and only
    one 'tissue'
    """
    g= (dfindex.effect == value)
    f= lambda x: (dfplot.ens_gene.isin(x)) & (dfplot.qval < q)
        
    
    genes_to_plot= dfindex[g].gene
    
    ind= f(genes_to_plot)
    x= dfplot[ind].b
    n= len(dfplot[ind].ens_gene.unique())
    if len(x) > min_length:
        sns.kdeplot(x, color= col,label= label+' n= {0}'.format(n), ax= ax, 
                    lw= 5, cut=0.5)    

        if len(x) < rug_length:
            sns.rugplot(x, color= col, ax= ax, height= .1, lw= 2)
    else:
        print('too few values to plot {0}'.format(label+' n= {0}'.format(n)))
Ejemplo n.º 7
0
    def start(self):

        # Normalise histogram
        norm = np.sum(np.diff(self.edges, axis=1) * self.hist, axis=1)
        hist = self.hist / norm[:, None]

        # Roll axis for easier plotting
        hist_r = np.rollaxis(hist, 1)
        nbins, npix = hist_r.shape
        e = self.edges[0]
        hist_tops = np.insert(hist_r, np.arange(nbins), hist_r, axis=0)
        edges_tops = np.insert(e, np.arange(e.shape[0]), e, axis=0)[1:-1]

        # Mask dead pixels
        spe = self.dead.mask1d(self.spe)
        spe_sigma = self.dead.mask1d(self.spe_sigma)
        hist_tops = self.dead.mask2d(hist_tops)

        # Spectrum with all pixels
        self.log.info("Plotting: spectrum_all")
        ax_spectrum_all = self.fig_spectrum_all.add_subplot(1, 1, 1)
        ax_spectrum_all.semilogy(edges_tops, hist_tops, color='b', alpha=0.2)
        ax_spectrum_all.set_xlabel("Amplitude (p.e.)")
        ax_spectrum_all.set_ylabel("Probability")

        # Sprectrum for each tm
        self.log.info("Plotting: spectrum_tm")
        hist_tops_tm = np.reshape(hist_tops, (hist_tops.shape[0], 32, 64))
        for tm, fig in enumerate(self.fig_spectrum_tm_list):
            ax = fig.add_subplot(1, 1, 1)
            ax.set_title("SPE Spectrum, TM {}".format(tm))
            ax.semilogy(edges_tops, hist_tops_tm[:, tm], color='b', alpha=0.2)
            ax.set_xlabel("Amplitude (p.e.)")
            ax.set_ylabel("Probability")

        # Combined gaussian of each spe value
        self.log.info("Plotting: combined_gaussian")
        ax_comgaus = self.fig_combgaus.add_subplot(1, 1, 1)
        x = np.linspace(-1, 4, 200)
        kernels = []
        for val, sigma in zip(spe.compressed(), spe_sigma.compressed()):
            kernel = stats.norm(val, sigma).pdf(x)
            kernels.append(kernel)
            # plt.plot(x, kernel, color="r")
        sns.rugplot(spe.compressed(), color=".2", linewidth=1, ax=ax_comgaus)
        density = np.sum(kernels, axis=0)
        density /= integrate.trapz(density, x)
        ax_comgaus.plot(x, density)
        ax_comgaus.set_xlabel("SPE Fit Value (p.e.)")
        ax_comgaus.set_ylabel("Sum")

        # Kernel density estimate
        self.log.info("Plotting: spe_kde")
        ax_kde = self.fig_kde.add_subplot(1, 1, 1)
        sns.rugplot(spe.compressed(), color=".2", linewidth=1, ax=ax_kde)
        sns.kdeplot(spe.compressed(), shade=True, ax=ax_kde)
        ax_kde.set_xlabel("SPE Fit Value (p.e.)")
        ax_kde.set_ylabel("KDE")

        # Histogram
        self.log.info("Plotting: histogram")
        ax_hist = self.fig_hist.add_subplot(1, 1, 1)
        sns.distplot(spe.compressed(), kde=False, rug=True, ax=ax_hist)
        ax_hist.set_xlabel("SPE Fit Value (p.e.)")
        ax_hist.set_ylabel("N")
    means2 = np.append(means2, m2)
    stds12 = np.append(stds2, s2)

    sample3 = np.random.normal(mu, std, sample_size3)
    m3 = np.mean(sample3)
    s3 = np.std(sample3)
    means3 = np.append(means3, m3)
    stds3 = np.append(stds3, s3)

    cont += 1

Fig2 = plt.figure(2)
ax2 = sns.lineplot(x, pdf, color='red')
ax2.fill_between(x, pdf, color="tomato", alpha=0.7)
#sns.scatterplot(means1,np.zeros(len(means1)),s=90,alpha=0.6,zorder=10)
sns.rugplot(means1)
plt.plot([mu, 0], [mu, 0.01], color='firebrick', linewidth=3, zorder=1)
plt.text(40, 0.0175, 'N= ' + str(sample_size1), size=12)
plt.title("Erro Padrão da Média")

Fig21 = plt.figure(21)
ax2 = sns.lineplot(x, pdf, color='red')
ax2.fill_between(x, pdf, color="tomato", alpha=0.7)
#sns.scatterplot(means2,np.zeros(len(means2)),s=90,alpha=0.6,zorder=10)
sns.rugplot(means2)
plt.plot([mu, 0], [mu, 0.01], color='firebrick', linewidth=3, zorder=1)
plt.text(40, 0.0175, 'N= ' + str(sample_size2), size=12)
plt.title("Erro Padrão da Média")

Fig22 = plt.figure(22)
ax2 = sns.lineplot(x, pdf, color='red')
Ejemplo n.º 9
0
    endnote_height=0,
    title_height=0)
sc_team1 = pitch.scatter(df_team1.x,
                         df_team1.y,
                         s=df_team1.shot_statsbomb_xg * 700,
                         ec='black',
                         color='#ba495c',
                         ax=axes[0])
sc_team2 = pitch.scatter(df_team2.x,
                         df_team2.y,
                         s=df_team1.shot_statsbomb_xg * 700,
                         ec='black',
                         color='#697cd4',
                         ax=axes[0])
# note height=1 means that the whole of the marginal axes are taken up by the rugplots
team1_rug_y = sns.rugplot(y=df_team1.y, ax=axes[1], color='#ba495c', height=1)
team1_rug_y = sns.rugplot(y=df_team2.y, ax=axes[3], color='#697cd4', height=1)
team1_rug_x = sns.rugplot(x=df_team1.x, ax=axes[2], color='#ba495c', height=1)
team2_rug_x = sns.rugplot(x=df_team2.x, ax=axes[2], color='#697cd4', height=1)
txt1 = axes[0].text(x=15,
                    y=70,
                    s=team1,
                    fontproperties=fm.prop,
                    color='#ba495c',
                    ha='center',
                    va='center',
                    fontsize=30)
txt2 = axes[0].text(x=105,
                    y=70,
                    s=team2,
                    fontproperties=fm.prop,
Ejemplo n.º 10
0
# 하나의 숫자 변수만 입력해야한다.
# bins : 변수를 n개의 bin으로 자른다.
b = df_last['평당분양가격'].hist(bins=10)

#distplot으로 히스토그램그리기
#distplot은 결측지가 있으면 에러가 난다.
price = df_last.loc[df_last['평당분양가격'].notnull(), '평당분양가격'] #어떤열을쓸건지도 적어주어야함
price

sns.distplot(price)

#kde, rug
#kde(kernal, density) -> 함수를 겹치는 방법으로 히스토그램보다 부드러운 곡선을 그린다.
#rug(실수 분포 플롯) -> 자료의 분포를 묘사하기 위해 사용됨
sns.kdeplot(price, shade=True)
sns.rugplot(price)

sns.displot(price, kde=True, rug=True)

sns.displot(data=df_last, x='평당분양가격', kde=True, rug=True, hue='전용면적')

sns.displot(data=df_last, x='평당분양가격', kde=True, rug=True, hue='전용면적', col='전용면적', col_wrap=1, aspect=2)

#subplot으로 여러개 그려보기
g = sns.FacetGrid(df_last, row='지역명', height=1.7, aspect=4)
g.map(sns.kdeplot, '평당분양가격')

#pairplot
df_last_notnull = df_last.loc[df_last['평당분양가격'].notnull(),['연도', '월', '지역명', '평당분양가격', '전용면적']]
sns.pairplot(df_last_notnull, hue='지역명')
Ejemplo n.º 11
0
# Changing default bins
sns.distplot(x, bins=20, kde=False, rug=True)

# Kernel Density - show shape of a distribution
sns.distplot(x, hist=False, rug=True)
sns.kdeplot(x, shade=True)

# Multiple Kernel Densities
sns.kdeplot(x)
sns.kdeplot(x, bw=.2, label="bw: 0.2")
sns.kdeplot(x, bw=2, label="bw: 2")
plt.legend()

# Cuts the edges off
sns.kdeplot(x, shade=True, cut=0)
sns.rugplot(x)

# Plotting Parametric
x = np.random.gamma(6, size=200)
sns.distplot(x, kde=False, fit=stats.gamma)

# Scatterplots
mean, cov = [0, 1], [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns=["x", "y"])
sns.jointplot(x="x", y="y", data=df)

# Hexbin Plots
x, y = np.random.multivariate_normal(mean, cov, 1000).T
with sns.axes_style("white"):
    sns.jointplot(x=x, y=y, kind="hex", color="k")
#############################################################################################################
# 2. Kernel Density Estimation Plots
#############################################################################################################
# The normal imports
import numpy as np
from numpy.random import randn
import pandas as pd
from scipy import stats
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

#Create dataset
dataset = randn(25)
#Create rugplot
sns.rugplot(dataset)
sns.kdeplot(dataset, shade=True, kernel='gau')
plt.hist(dataset, normed=True, color="#6495ED", alpha=.5)



Ejemplo n.º 13
0
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 차트에 한글 가능하도록
from matplotlib import font_manager, rc, rcParams
font_name = font_manager.FontProperties(
    fname="c:/windows/Fonts/malgun.ttf").get_name()
rc('font', family=font_name)
rcParams['axes.unicode_minus'] = False  # 부호표시 (-,+) 사용할때
###
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

import seaborn as sns

iris = sns.load_dataset('iris')
x = iris.petal_length.values

# 단순 데이터들의 위치를 표시 한다.
sns.rugplot(x)

# 데이터의 밀도를 그래프로 표현한다.
sns.kdeplot(x)
plt.title('꽃 잎 길이에 대한 Kernal 밀도 그래프')
plt.show()
Ejemplo n.º 14
0
           ncol=3,
           title='donor',
           title_fontsize=9)

### Subplot B - KDE histograms with ADCC data using CD56+ cells
plt.subplot(223)

sns.kdeplot(data_cd56['EC50'],
            bw=.4,
            shade=True,
            color='indigo',
            linewidth=2,
            legend=False)

sns.rugplot(
    data_cd56['EC50'],
    color='indigo',
)

plt.ylabel("density", fontdict={'size': 8, 'weight': 'bold'})
plt.yticks(size=6)
plt.xlabel(r"log$_{10}$EC$_{50}$ (µg/mL)",
           fontdict={
               'size': 8,
               'weight': 'bold'
           })
plt.axvline(
    x=data_cd56['EC50'].mean(),  # equals -3.0042
    ls='--',
    linewidth=2,
    color='red')
plt.xlim((-5, 0))
Ejemplo n.º 15
0
def distribution_visualization_1vN(n,
                                   markers=['+', 'o', 's', 'd', '.'],
                                   colors=['c', 'r', 'g', 'b', 'm', 'gray']):
    """ Plots a graph for each input feature, showing the generated sample distribution around the true feature for
    the samples that are positioned at the (25, 50, 75)th percentiles. """
    import DoomLevelsGAN.DoomGAN as nn
    output_graph_folder = nn.FLAGS.ref_sample_folder + "graphs/1v{}/input_features/".format(
        n)
    os.makedirs(output_graph_folder, exist_ok=True)
    percent_dict = load_level_subset()
    true_samples = dict()
    for p in percent_dict:
        for name in percent_dict[p]:
            true_samples[name] = None
    true_samples = nn.gan.get_samples_by_name(true_samples)
    while None in true_samples.values():
        print(
            "Not all levels have been found, due to random selection of levels to match the batch size. Retrying.."
        )
        true_samples = nn.gan.get_samples_by_name(true_samples)

    # loading generated results
    path = nn.FLAGS.ref_sample_folder + 'samples_percentiles/generated1v{}'.format(
        n)
    names = np.load(path + 'names.npy'.format(n))
    generated = np.load(path + 'generated.npy'.format(n))

    gen_samples = dict()
    for n_id, name in enumerate(names):
        gen_samples[name] = generated[n_id, ...]

    # Input features, only relevant levels
    for f, fname in enumerate(nn.gan.features):
        fig = plt.figure(figsize=(15, 11))
        for p, pname in enumerate(percent_dict):
            if pname == 'perc0' or pname == 'perc100':
                continue
            name = percent_dict[pname][f]
            samp = generated[np.where(names == name)][0]
            true_value = true_samples[name][fname]
            values = samp[fname]
            #axt = sb.rugplot([np.mean(values)], height=1, ls="-", linewidth=0.75, marker=colors[p])
            axt = sb.rugplot(true_value,
                             height=1,
                             ls="--",
                             linewidth=0.75,
                             label="True_{}".format(pname),
                             color=colors[p],
                             marker=markers[p])
            sb.kdeplot(values,
                       ax=axt,
                       ls="-",
                       label="Generated_{}".format(pname),
                       color=colors[p],
                       marker=markers[p])
            plt.setp(axt.get_legend().get_texts(), fontsize=30)
            axt.set_xlabel("{}".format(fname), fontsize=45)

        #plt.title("{} generated samples distribution from every quartile of feature \"{}\"".format(n, fname))
        fig.canvas.set_window_title("{}".format("{}".format(fname)))
        fig.tight_layout()
        fig.savefig(output_graph_folder + '1v{}_{}.png'.format(n, fname))
        fig.savefig(output_graph_folder + '1v{}_{}.pdf'.format(n, fname))
Ejemplo n.º 16
0
expensive_tasks = indices_vs_expensive.filter(lambda x: x[1] == 1)
expensive_tasks = expensive_tasks.map(lambda x: x[0])
labeled_expensive_tasks = expensive_tasks.zipWithIndex()
partition_vs_expensive_task = labeled_expensive_tasks.map(lambda x: (x[1] % num_partitions, x[0]))

# Get cheap tasks ready to process
cheap_tasks = indices_vs_expensive.filter(lambda x: x[1] == 0)
cheap_tasks = cheap_tasks.map(lambda x: x[0])
labeled_cheap_tasks = cheap_tasks.zipWithIndex()
partition_vs_cheap_task = labeled_cheap_tasks.map(lambda x: (x[1] % num_partitions, x[0]))

# Combine cheap & expensive tasks, now designated to an appropriate partition
partition_vs_ij = partition_vs_expensive_task.union(partition_vs_cheap_task)
# Sort data into the correct partition...sorted by key!
sorted_by_partition = partition_vs_ij.sortByKey(numPartitions=100)

mandelbrot_load_balanced = sorted_by_partition.map(lambda a: mandelbrot_wrapper(*a[1]))

summed_rdd = P2.sum_values_for_partitions(mandelbrot_load_balanced)
summed_result = summed_rdd.collect()

# Now collect the data & plot
plt.hist(summed_result, bins=np.logspace(3, 8, 20))
sns.rugplot(summed_result, color="red")
plt.gca().set_xscale("log")
plt.xlabel("Total Number of Iterations on Partition")
plt.ylabel("Partition Count")
plt.title("Number of Iterations on each Partition")

plt.savefig("P2b_hist.png", dpi=200, bbox_inches="tight")
Ejemplo n.º 17
0
print("-"*30)
print(data1.isnull().sum())


# 绘制密度图
h_male=data1[data1["gender"]=="男"]["height"]
h_female=data1[data1["gender"]=="女"]["height"]

fig=plt.figure(figsize=(8,5))

sns.kdeplot(h_male, label="male_height",
            linestyle = '--',linewidth = 2,color='orange')    # 男性
sns.kdeplot(h_female, label="female_height",
            linestyle = '--',linewidth = 2,color='g')    # 女性
# 数据频率分布图
sns.rugplot(h_male,height = 0.1,color = 'orange')
sns.rugplot(h_female,height = 0.05,color = 'g')

# 绘制辅助线 
male_height_mean=h_male.mean()                           # 男性平均身高
female_height_mean=h_female.mean()                       # 女性平均身高
plt.axvline(x=male_height_mean,linestyle="--",linewidth = 1.2,color="orange",alpha=0.5)
plt.axvline(x=female_height_mean,linestyle="--",linewidth = 1.2,color="g",alpha=0.5)

# 添加注释
plt.text(male_height_mean,0.005,"male_height_mean: %.1fcm"%male_height_mean,color="orange")
plt.text(female_height_mean,0.01,"female_height_mean: %.1fcm"%female_height_mean,color="g")

# 标题
plt.title("Athlete's height")
plt.grid(linestyle="--")
Ejemplo n.º 18
0
'''

import matplotlib

from cache_codec import *
from matplotlib import pyplot as plotter

import seaborn

if __name__ == "__main__":
	#Figure showing gaussian mixture model for ERBB2
	models = load_gene_models("BC")
	samples = load_sample_profiles("BC").values()
	expression_levels = [sample.profiles["ERBB2"].intensity for sample in samples]

	print(expression_levels)

	seaborn.rugplot(expression_levels, color="black")
	seaborn.distplot(models["ERBB2"].sample(100000), hist=False)

	exp_patch = matplotlib.patches.Patch(color='black', label="Expression Data")
	gmm_patch = matplotlib.patches.Patch(color='blue', label="GMM model")
	plotter.legend(handles=[exp_patch, gmm_patch])
	plotter.legend(loc="upper right")

	plotter.title("ERBB2")
	plotter.xlabel("Expression Level")
	plotter.ylabel("Density")

	plotter.savefig("Yay!.png")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import seaborn as sns

df = pd.read_csv('AAPL_2006-01-01_to_2018-01-01.csv',
                 index_col='Date',
                 parse_dates=['Date'])
sns.heatmap(df.isnull(), cbar=False, yticklabels=False)
sns.heatmap(df.corr())
plt.hist(df['Open'], bins=40)
sns.rugplot(df['Open'])
sns.pairplot(df)

training_set = df[:'2016'].iloc[:, 1:2].values
test_set = df['2017':].iloc[:, 1:2].values

df['High'][:'2016'].plot(figsize=(16, 9), legend=True)
df['High']['2017':].plot(figsize=(16, 9), legend=True)
plt.legend(['Training Set (Before 2016)', ' Test Set (After 2017)'])
plt.title("Apple Stock Price")
plt.tight_layout()
plt.show()

from sklearn.preprocessing import MinMaxScaler
sc_x = MinMaxScaler(feature_range=(0, 1))
scaled_training_set = sc_x.fit_transform(training_set)

X_train = []
y_train = []
Ejemplo n.º 20
0
# kind=''is scatter by default, but this controls what sort of graph is displayed
# on the plot. We pass in 'hex' to display a hexagraph

sns.jointplot(x='total_bill', y='tip', data=tips, kind='kde', color='blue')

# pairplot will plots all of the columns and rows against each other
# this is a great way to quickly visualize your data.
# hue='' takes in a categorical (True/False, Male/Female, etc.) column and will
# separate and plot these on the pairplot graph. it also takes palette='' which
# allows you to set the colors yourself
sns.pairplot(tips, hue='sex')

# a rugplot draws one small line for each value in the single column. seems to be
# less useful than a histogram (distplot())
sns.rugplot(tips['total_bill'])

# KDE plots = Kernel Density Estimation plots
# create a dataset
dataset = np.random.randn(25)

# Create another rugplot
sns.rugplot(dataset)

# set up the x-axis for the plot
x_min = dataset.min() - 2
x_max = dataset.max() + 2

# 100 equally space points from x_min to x_max
x_axis = np.linspace(x_min, x_max, 100)
Ejemplo n.º 21
0

#
#fig,(ax1,ax2) = plt.subplots(ncols=2,figsize=(12,4))
#sns.regplot('X1','X2',data=pd.DataFrame(data=X_norm,columns=['X1','X2']),fit_reg=False,ax=ax1)
#ax1.set_title('Original dimension')
#sns.rugplot(Z,ax=ax2)
#ax2.set_xlabel('Z')
#ax2.set_title('Z dimension')
#plt.show()
    
# 将数据恢复到原始维度
X_recover = recover_data(Z,U)
fig,(ax1,ax2,ax3) = plt.subplots(ncols=3,figsize=(12,4))
 
sns.rugplot(Z,ax=ax1)
ax1.set_title('Z dimension')
ax1.set_xlabel('Z')
 
sns.regplot('X1','X2',
             data = pd.DataFrame(X_recover,columns=['X1','X2']),
             fit_reg=False,
             ax=ax2)
ax2.set_title('2D projection from Z')

sns.regplot('X1','X2',
            data=pd.DataFrame(data=X_norm,columns=['X1','X2']),
            fit_reg=False,
            ax=ax3)
ax3.set_title('Original dimension')
plt.show()
Ejemplo n.º 22
0
df = pd.read_csv('tips.csv')
#First thirty rows
tips = df.head(10)
#Display the table
st.table(tips)
st.header("Visualisation Using Seaborn")
#bar plot
st.subheader("Bar Plot")
tips.plot(kind='bar')
st.pyplot()
#Displot
st.subheader("Displot")
sns.displot(tips['total_bill'])
st.pyplot()
#joinplot
st.subheader("JointPlot")
sns.jointplot(x='total_bill', y='tip', data=tips, kind='scatter')
st.pyplot()
#pairplot
st.subheader("Pairplot")
sns.pairplot(tips, hue='sex', palette='rainbow')
st.pyplot()
#Rugplot
st.subheader("Rugplot")
sns.rugplot(tips['tip'])
st.pyplot()
#Correation
st.subheader("Heatmap")
sns.heatmap(tips.corr(), cmap='coolwarm', annot=True)
st.pyplot()
Ejemplo n.º 23
0
maTeamPts4 = pd.rolling_mean(teamdf.Pts4, maGames)
maTeamPtsAg = pd.rolling_mean(teamdf.PtsAg, maGames)

fig3 = plt.figure('MatPlotLib Moving Average')
ax3 = plt.subplot(2, 1, 1, title=("Points For "+str(maGames)+ " Game Moving Average"))
plt.plot(maTeamPts4.index, maTeamPts4)
plt.ylim(ymin=0)

ax4 = plt.subplot(212, title=("Points Against " +str(maGames)+ " Game Moving Average"), sharex=ax3)
plt.plot(maTeamPtsAg.index, maTeamPtsAg)
plt.ylim(ymin=0)


fig4 = plt.figure('Seaborn KDE')
#sns.distplot(teamdf.Pts4, hist=False, kde_kws={"shade": True})
ax5 = plt.subplot(2,1,1, title=team + ' Points-For KDE')
sns.kdeplot(teamdf.Pts4, bw=.4, cut=40, shade=True, label=team)
#sns.kdeplot(df.Pts4, bw=1, shade=True, label='League Overall')
ax6 = plt.subplot(2,1,2, title=team + ' Points-Against KDE', sharex=ax5)
sns.kdeplot(teamdf.PtsAg, bw=.4, cut=40, shade=True, label=team)



plt.figure('Rugplot')
sns.rugplot(teamdf.Pts4)





plt.show()
Ejemplo n.º 24
0
# A dice roll

# The Probability Mass function

# Each number
roll_options = [1, 2, 3, 4, 5, 6]

# Total probability space is 1
tprob = 1

# Each roll has the same odds of appearing --> 1/6
prob_roll = tprob / len(roll_options)

# Plot using seaborn rugplot (note this is not really a rugplot),
# setting height equal to probability of roll
uni_plot = sns.rugplot(roll_options, height=prob_roll, c='indianred')

# Set Title
uni_plot.set_title('Probability Mass Function for Dice Roll')

# We can see in the above example that the f(x) value on the plot is just
# equal to 1/(Total Possible Outcomes)

# The mean is simply the max and min value divided by two, just like
# the mean of two numbers     μ=(b+a)/2
# With a variance of:  σ^2=(b−a+1)^2 / 12

# automatically create a Discrete Uniform Distribution using Scipy.
# Imports
from scipy.stats import randint
Ejemplo n.º 25
0
import numpy as np
from numpy.random import randn
import pandas as pd

from scipy import stats

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
dataset = randn(25)
sns.rugplot(dataset)
plt.hist(dataset, alpha=0.3)  #ヒストグラムを透明度を持たせて作成
sns.rugplot(dataset)  #rugplotで実際のデータがどこにあるのかを表示
#ここから
sns.rugplot(dataset)
x_min = dataset.min() - 2
x_max = dataset.max() + 2
x_axis = np.linspace(x_min, x_max, 100)

bandwidth = ((4 * dataset.std()**5) / (3 * len(dataset)))**0.2

kernel_list = []
for data_point in dataset:
    kernel = stats.norm(data_point, bandwidth).pdf(x_axis)
    kernel_list.append(kernel)

    kernel = kernel / kernel.max()
    kernel = kernel * 0.4
    plt.plot(x_axis, kernel, color='gray', alpha=0.5)
plt.ylim(0, 1)
#ここまではカーネル密度関数をseabornを使わないで実装する方法
Ejemplo n.º 26
0
X3 = rng.normal(4, 1, size=n_per_group)
X = np.concatenate((X1, X2, X3))
labels = np.concatenate((0 * ones, 1 * ones, 2 * ones))

# sort to help visualize
sort_inds = np.argsort(-X)
X = X[sort_inds]
labels = labels[sort_inds]

fig, ax = plt.subplots(1, 1, figsize=(8, 6))
sns.histplot(x=X, hue=labels, palette=palette, bins=50, stat="density", ax=ax)
sns.rugplot(
    x=X,
    hue=labels,
    palette=palette,
    height=0.05,
    legend=False,
    ax=ax,
    expand_margins=True,
)
stashfig("rank-distribution")

#%% [markdown]
# ## A distribution from the latent ranks
# Using the ranks, we can create a distribution from which to sample graphs. Here I plot
# the matrix of edge probabilities $P$ and an adjacency matrix $A$ from it.
#%%
k = 15
beta = 5

Ejemplo n.º 27
0
#no curve, rug lines at bottom
sns.distplot(data.mpg, bins=20, kde=False, rug=True)
#more bins, no curve
sns.distplot(data.mpg, hist=False, rug=True)
#without density
sns.jointplot(x="wt", y="mpg", data=data)
sns.jointplot(x="wt", y="mpg", data=data, kind="kde")
sns.jointplot(
    x="x",
    y="y",
    data=df,
)
#Links:https://seaborn.pydata.org/tutorial/distributions.html
f, ax = plt.subplots(figsize=(6, 6))
sns.kdeplot(data.wt, data.mpg, ax=ax)
sns.rugplot(data.wt, color="g", ax=ax)
sns.rugplot(data.mpg, vertical=True, ax=ax)
#%% Pair Plot
sns.pairplot(data[['wt', 'mpg', 'hp', 'qsec']])
#%%%

#%%%
#%%%outliers
#In statistics, an outlier is an observation point that is distant from other observations.
sns.boxplot(x=data['mpg'])
#The Z-score is the signed number of standard deviations by which the value of an observation or data point is above the mean value of what is being observed or measured
#Links: https://towardsdatascience.com/ways-to-detect-and-remove-the-outliers-404d16608dba
from scipy import stats
import numpy as np
z = np.abs(stats.zscore(data.mpg))
print(z)
Ejemplo n.º 28
0
def Time_Nature_kdeplots(time_nature={
    'month': ['temp', 'RH', 'wind'],
    'day': ['temp', 'RH', 'wind']
},
                         figNo=2,
                         clr_background='Greens',
                         clr_rugplots='green',
                         scatters=True):
    '''
: Function name: Time_Nature_kdeplots
: Function works to demonstrate the kdeplots reagarding fire intensity with natural features against time index(month/day)
  Explanation of kde plots: 
  		1) x-axis: time index (month/day)
  		2) y-aixs: natural feature from ['temp','RH','wind'] P.S.: rain has been ignored since 99.6% of its data =0 which will have no density in kdeplots
  		3) color shades: fire intensity by values of 'ln(area+1)'
  		4) color bar: demonstrating the change and scope of values from 'ln(area+1)'
  		5) rug plots at each axis: mark the values of natural feature/time index
  		6) scatters: locate the fire cases 
: type time_nature: dict{str:list}
: param time_nature: time_nature.keys():time index
					  time_nature.values(): a list of natural features 
: type figNo: int
: param figNo: number of figures plot= number of time indices in dict 
: type clr_background: str
: param clr_background: color palatte chosen for kdeplots, users can only set the color palattes available in Seaborn database
: type clr_rugplots: str
: param clr_rugplots: color palatte chosen for rugplots, users can only set the color available in Seaborn database
: type scatters: bool
: param scatters: whether to show scatters in kdeplots(True/False)
	
'''
    assert isinstance(figNo, int) and figNo == len(time_nature.keys(
    ))  #number of figures plot= number of time indices in dict
    assert isinstance(time_nature, dict)
    assert isinstance(clr_background, str)
    assert isinstance(clr_rugplots, str)
    n = 1  # figure index
    for i in time_nature.keys():
        assert isinstance(i, str) and i in ['month', 'day'
                                            ]  # the time indices available
        assert isinstance(time_nature[i], list)
        for v in time_nature[i]:
            assert v in ['temp', 'RH',
                         'wind']  # the natatural features available
        m = len(time_nature[i]
                )  # the number of natural features for one time index
        q = 100 + 10 * m + 1  # the index of subplot
        if m == 1: figsize = (5, 5)
        elif m == 2: figsize = (14, 6)
        else: figsize = (18, 5)  # set the figsize
        plt.figure(n, figsize)
        for j in range(m):
            plt.subplot(q)
            plt.title(i + '-' + time_nature[i][j],
                      fontsize=20,
                      position=(0.5, 1.05))
            plt.xlabel(i, fontsize=20)
            plt.ylabel(time_nature[i], fontsize=20)
            sns.kdeplot(
                fire[i],
                fire[time_nature[i][
                    j]],  # demonstrate the probability distribution of two variables
                cbar=True,  # display color bar
                shade=True,  # display shades
                cmap=clr_background,  # set the color palatte
                shade_lowest=False,  # not display periphery color/shade
                n_levels=40  # number of curves, the higher, the smoother
            )  # the color change indicates the change of density
            plt.grid(linestyle='--')  # show grids
            if scatters == True:  #set scatters with their sizes, colors and shapes
                plt.scatter(fire[i],
                            fire[time_nature[i][j]],
                            s=5,
                            alpha=0.5,
                            color='k',
                            marker='+')
            sns.rugplot(fire[i], color=clr_rugplots, axis='x',
                        alpha=0.5)  #set the rugplots
            sns.rugplot(fire[time_nature[i][j]],
                        color=clr_rugplots,
                        axis='y',
                        alpha=0.5)
            q += 1
        n += 1
    plt.show()
Ejemplo n.º 29
0
def InvarianceTestKolSmirn(epsi,
                           y1,
                           y2,
                           band_int,
                           cdf_1,
                           cdf_2,
                           up_band,
                           low_band,
                           pos=None,
                           name='Invariance Test',
                           bound=(0, 0)):
    # This function returns a figure for the Kolmogorov-Smirnov (IID) test for invariance
    # INPUTS
    #  epsi      :[vector](1 x t_end) series of (to be tested as such) invariants
    #  y1        :[vector](1 x ~t_end/2) first partition of vector epsi
    #  y2        :[vector](1 x ~t_end/2) second partition of vector epsi
    #  band_int  :[row vector] x-axis values of the (upper and lower) band
    #  cdf_1     :[vector](1 x ~t_end/2) empirical cdf of y1
    #  cdf_2     :[vector](1 x ~t_end/2) empirical cdf of y2
    #  up_band   :[row vector] y-axis values of the upper band
    #  low_band  :[row vector] y-axis values of the lower band
    #  pos       :[cell] cell array containing the positions of each graph
    #                    - pos{1} -> position of the histogram of first sample
    #                    - pos{2} -> position of the histogram of second sample
    #                    - pos{3} -> main plot position
    #                    - pos{4} -> title position
    #  name      :[string] title of the figure
    #  bound     :[vector](1x2) lower and upper values of x-axis

    ## Code

    if pos is None:
        pos = {}
        pos[1] = [0.1300, 0.74, 0.3347, 0.1717]
        pos[2] = [0.5703, 0.74, 0.3347, 0.1717]
        pos[3] = [0.1300, 0.11, 0.7750, 0.5]
        pos[4] = [0.3, 1.71]
    # pos [4]=[band_int[0]+(0.5-0.07)@(band_int[-1]-band_int[0]) 1.8]

    # colors
    blue = [0.2, 0.2, 0.7]
    l_blue = [0.2, 0.6, 0.8]
    orange = [.9, 0.6, 0]
    d_orange = [0.9, 0.3, 0]

    # max and min value of the first reference axis settings, for both plots [0] and [1]
    if bound[0] != 0:
        xlim_1 = bound[0]
    else:
        xlim_1 = band_int[0]
    if bound[1] != 0:
        xlim_2 = bound[1]
    else:
        xlim_2 = band_int[-1]

    # max value for the second reference axis setting, for plot [0]
    ycount, _ = histogram(epsi,
                          int(round(10 * log(len(epsi.flatten())))),
                          normed=False)
    ylim = npmax(ycount)

    # # histograms
    # n1y, n1x = histogram(y1, int(round(10 * log(len(y1.flatten())))))
    # n2y, n2x = histogram(y2, int(round(10 * log(len(y2.flatten())))))

    ax1 = plt.subplot2grid((2, 2), (0, 0))
    ax2 = plt.subplot2grid((2, 2), (0, 1))
    ax3 = plt.subplot2grid((2, 2), (1, 0), colspan=2)
    # # plot histogram of Sample 1, y1
    sns.distplot(y1,
                 bins=int(round(10 * log(len(y1.flatten())))),
                 kde=False,
                 color=orange,
                 hist_kws={
                     "alpha": 1,
                     "edgecolor": "k"
                 },
                 ax=ax1)
    ax1.set_xlabel('Sample1')
    ax1.set_xlim((xlim_1, xlim_2))
    ax1.set_ylim([0, ylim * 0.8])
    ax1.ticklabel_format(axis='x', style='sci', scilimits=(-2, 2))
    ax1.grid(False)

    sns.distplot(y2,
                 bins=int(round(10 * log(len(y2.flatten())))),
                 kde=False,
                 color=l_blue,
                 hist_kws={
                     "alpha": 1,
                     "edgecolor": "k"
                 },
                 ax=ax2)
    ax2.grid(False)
    ax2.set_xlabel('Sample2')
    ax2.set_xlim((xlim_1, xlim_2))
    ax2.set_ylim([0, ylim * 0.8])
    ax2.ticklabel_format(axis='x', style='sci', scilimits=(-2, 2))
    # plot the cdf[s]
    # plot data on the first reference axis

    up_limit_y1 = interp(y1[0], band_int, up_band)
    low_limit_y1 = interp(y1[0], band_int, low_band)
    up_limit_y2 = interp(y2[0], band_int, up_band)
    low_limit_y2 = interp(y2[0], band_int, low_band)

    ax3.scatter(y1, cdf_1, color=d_orange, s=2)
    ax3.scatter(y2, cdf_2, color=blue, s=2)

    sns.rugplot(y1[0], height=0.025, color=d_orange, ax=ax3)
    sns.rugplot(y2[0], height=0.025, color=blue, ax=ax3)

    #
    # # plot the (upper and lower) band
    ax3.plot(band_int, up_band, '-', color='k', lw=0.5)
    ax3.plot(band_int, low_band, '-', color='k', lw=0.5)
    #
    ax3.set_xlabel('data')
    ax3.set_ylabel('cdf')

    ax3.set_xlim([xlim_1, xlim_2])
    ax3.set_ylim([-0.05, 1.05])
    ax3.ticklabel_format(axis='x', style='sci', scilimits=(-2, 2))
    plt.suptitle(name)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
Ejemplo n.º 30
0
import numpy as np
import pandas as pd
from scipy import stats, integrate
import matplotlib.pyplot as plt

import seaborn as sns


x = np.random.normal(0, 1, size=30)
bandwidth = 1.06 * x.std() * x.size ** (-1 / 5.)
support = np.linspace(-4, 4, 200)

kernels = []
for x_i in x:

    kernel = stats.norm(x_i, bandwidth).pdf(support)
    kernels.append(kernel)
    plt.plot(support, kernel, color="r")

sns.rugplot(x, color=".2", linewidth=3);
Ejemplo n.º 31
0
def ModelFeature_kdeplots(model_nature={'DMC':['temp','RH']},figNo=2,clr_background='Blues',clr_rugplots='blue',scatters=True):
	'''
: Function name: ModelFeature_kdeplots
: Function works to demonstrate the kdeplots reagarding fire intensity with model features against natural features
  Explanation of kde plots: 
  		1) x-axis: natural feature from ['temp','RH','wind'] P.S.: rain has been ignored since 99.6% of its data =0 which will have no density in kdeplots
  		2) y-aixs: model feature from ['DMC','DC','FFMC','ISI']
  		3) color shades: fire intensity by values of 'ln(area+1)'
  		4) color bar: demonstrating the change and scope of values from 'ln(area+1)'
  		5) rug plots at each axis: mark the values of natural feature/model feature
  		6) scatters: locate the fire cases 
: type model_nature: dict{str:list}
: param model_nature: model_nature.keys():model features
					  model_nature.values(): a list of natural features that influence the corresponding key/model feature
					  P.S.: If user relate a natural feature having no relationship with the model feature in the dict, assertion error will be thrown 
: type figNo: int
: param figNo: number of figures plot= number of model features in dict 
: type clr_background: str
: param clr_background: color palatte chosen for kdeplots, users can only set the color palattes available in Seaborn database
: type clr_rugplots: str
: param clr_rugplots: color palatte chosen for rugplots, users can only set the color available in Seaborn database
: type scatters: bool
: param scatters: whether to show scatters in kdeplots(True/False)
	
'''
	assert isinstance(model_nature,dict)
	assert isinstance(figNo,int)
	assert figNo==len(model_nature.keys()) #number of figures plot= number of model features in dict 
	assert isinstance(clr_background,str)
	assert isinstance(clr_rugplots,str)
	
	for i in model_nature.keys():
		assert isinstance(i,str) and i in ['DMC','DC','FFMC','ISI'] #only the model feature in dataset
		assert isinstance(model_nature[i],list)
		if i == 'DMC': # the following if/else commands make sure users can only match the model feature with those natural ones having influence on it 
			for j in model_nature[i]:
				assert isinstance(j,str) and j in ['temp','RH'] # DMC can only be related to temperature and relative humidity
		elif i=='DC':
			for j in model_nature[i]:
				assert isinstance(j,str) and j in ['RH']
		elif i=='FFMC':
			for j in model_nature[i]:
				assert isinstance(j,str) and j in ['temp','RH','wind']
		elif i=='ISI':
			for j in model_nature[i]:
				assert isinstance(j,str) and j in ['wind']
	
	for i in range(figNo):
		x=[i for i in model_nature.keys()]# a lits of all model features entered
		m=len(model_nature[x[i]]) # the number of natural features for one model feature 
		n=100+10*m+1 # the index of subplot
		if m==1: figsize=(5,5)
		elif m==2: figsize=(14,6)
		else: figsize=(15,6) # set the figsize 
		plt.figure(i,figsize)
		for j in range(m):               
			plt.subplot(n)
			plt.title(x[i]+'-'+model_nature[x[i]][j], fontsize=14, position=(0.5,1.05))
			plt.xlabel(model_nature[x[i]][j],fontsize=14)
			plt.ylabel(x[i],fontsize=14)
			sns.kdeplot(c[model_nature[x[i]][j]],c[x[i]], # !! c is the new dataframe for data in firecases 
			           cbar = True,    # display color bar
			           shade = True,   # display shades
			           cmap = clr_background,  # set the color palatte
			           shade_lowest=False,  # not display periphery color/shade
			           n_levels = 40   # number of curves, the higher, the smoother
			           )# the color change indicates the change of density
			plt.grid(linestyle = '--')# show grids
			if scatters==True: #set scatters with their sizes, colors and shapes 
				plt.scatter(c[model_nature[x[i]][j]],c[x[i]], s=5, alpha = 0.5, color = 'r', marker='*') 
			sns.rugplot(c[model_nature[x[i]][j]], color=clr_rugplots, axis='x',alpha = 0.5) #set the rugplots 
			sns.rugplot(c[x[i]], color=clr_rugplots, axis='y',alpha = 0.5)
			n+=1
	plt.show()
Ejemplo n.º 32
0
sns.jointplot(x='total_bill', y='tip', data=tips, kind='scatter')
sns.jointplot(x='total_bill', y='tip', data=tips, kind='hex')
sns.jointplot(x='total_bill', y='tip', data=tips, kind='reg')

##############################################################################
# pairplot: relationships across an entire dataframe for the numerical columns
# supports a color hue argument for categorical columns
##############################################################################
sns.pairplot(tips)
sns.pairplot(tips, hue='sex', palette='coolwarm', diag_kind='hist')

#################################################################################
# rugplot: It just draws a dash mark for every point on a univariate distribution
# They are the building block of a KDE plot
#################################################################################
sns.rugplot(tips['total_bill'])

#################################################################################
# kdeplot: is Kernel Density Estimation plot These KDE plots replace every single
# observation with a Gaussian (Normal) distribution centered around that value
#################################################################################

#Create dataset
dataset = np.random.randn(25)
print dataset

# Create another rugplot
sns.rugplot(dataset)

# Set up the x-axis for the plot
x_min = dataset.min() - 2
Ejemplo n.º 33
0
    kmeans_ari = adjusted_rand_score(y, kmeans_pred_labels)

    return gmm_ari - kmeans_ari


np.random.seed(8888)
n_sims = 40
seeds = np.random.randint(1e8, size=n_sims)  # random
# seeds = np.ones(n_sims, dtype=int) # uncomment for not random
par = Parallel(n_jobs=2)
ari_diffs = par(delayed(run_experiment)(seed) for seed in seeds)

fig, ax = plt.subplots(1, 1, figsize=(8, 4))
ax.axvline(0, linewidth=2, linestyle="--", color="red")
sns.distplot(ari_diffs, norm_hist=False)
sns.rugplot(ari_diffs)
xlim = ax.get_xlim()
ylim = ax.get_ylim()
y_range = ylim[1] - ylim[0]
ypos = ylim[0] + y_range * 0.75
x_range = xlim[1] - xlim[0]
ax.text(xlim[0] + 0.05 * x_range, ypos, "KMeans \n better")
ax.text(xlim[1] - 0.05 * x_range,
        ypos,
        "GMM \n better",
        horizontalalignment="right")
ax.spines["left"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.set_yticks([])
ax.set_xlabel("(GMM - KMeans) ARI")
sns.set_theme()

# create two simple continuous variables
x = np.random.normal(size=100)
y = np.random.normal(size=100)

data_df = pd.DataFrame({"x": x, "y": y})

sns.set_theme(style="darkgrid")

# plot the distribution of the data
sns.scatterplot(x="x", y="y", data=data_df, ax=ax[0, 0]);
ax[0, 0].set_title("Scatter Plot")

# create a histogram plot of x variable w/ red color
sns.histplot(data=data_df, x="x", color="r", ax=ax[0, 1])
ax[0, 1].set_title("Histogram")

# create a density plot of x variable
sns.kdeplot(x="x", data=data_df, ax=ax[1, 0])
sns.rugplot(x="x", data=data_df, ax=ax[1, 0])
ax[1, 0].set_title("Density Plot")

# create a box plot of x variable w/ green color
sns.boxplot(x="x", data=data_df, color="g", ax=ax[1, 1])
ax[1, 1].set_title("Box Plot")

plt.show();

Ejemplo n.º 35
0
df = pd.DataFrame(pd.read_csv('SmallSalaries.csv'))
df['BasePay'].fillna(value=df['BasePay'].mean())  # fill NaN value with mean()
sns.distplot(df['BasePay'])  # draw a distplot on your own dataset

tips = sns.load_dataset('tips')

# DISTPLOT
# The distplot shows the distribution of a univariate set of observations.
sns.distplot(tips['total_bill'])
# KDE: Kernel Density Estimation
# to remove KDE line, use sns.distplot(tips['total_bill'], kde=False)

# JOINTPLOT
# jointplot() allows you to basically match up two distplots for bivariate data
# With your choice of what kind parameter to compare with:
# “scatter”     “reg”       “resid”     “kde”       “hex”
sns.jointplot(x='total_bill', y='tip', data=tips,
              kind='reg')  # or kind='scatter',.etc

# PAIRPLOT
# pairplot will plot pairwise relationships across an entire dataframe (for the numerical columns)
# and supports a color hue argument (for categorical columns)
sns.pairplot(tips, hue='sex', palette='coolwarm')

# RUGPLOT
# draw a dash mark for every point on a univariate distribution
# They are the building block of a KDE plot
sns.rugplot(tips['total_bill'])

plt.show()
Ejemplo n.º 36
0
import seaborn as sns
import matplotlib.pyplot as plt

tips = sns.load_dataset(
    'tips'
)  # Make Data Frame with columns: total_bill, tip, sex, smoker, day, time, size

sns.distplot(tips['total_bill'])  # Make histogram
sns.distplot(tips['total_bill'], kde=False)  # Histogram without line

sns.distplot(tips['total_bill'], bins=110)  # count of parts of histogram

sns.jointplot(x='total_bill', y='tip', data=tips)  # Compare two columns
sns.jointplot(x='total_bill', y='tip', data=tips,
              kind='hex')  # Change type of representation(hex, reg, kde)

sns.pairplot(tips)  # visual all numeric data between each other
sns.pairplot(tips, hue='sex')  # divide data on categories
sns.pairplot(tips, hue='sex', palette='coolwarm')  # colors for categories

sns.rugplot(tips['total_bill'])  # graphic with one high dashes

sns.kdeplot(tips['total_bill'])  # graphic with one line

plt.show()
tips=sns.load_dataset('tips')

# distplot using seaborn
sns.distplot(tips['total_bill'], kde=False,bins=30)

#jointplot using seaborn with kind value like by defualt scatter 'scatter', 'reg', 'resid', 'kde', or 'hex'
sns.jointplot(x='total_bill',y='tip',data=tips,kind="reg")


# pairplot using seaborn 
""" pairplot will plot pairwise relationships across an entire dataframe 
(for the numerical columns) and 
supports a color hue argument (for categorical columns). hue='sex',palette='coolwarm', here hue means individual data
"""

sns.pairplot(tips,hue='sex',palette='coolwarm')

"""
rugplot
rugplots are actually a very simple concept,
they just draw a dash mark for every point on a univariate distribution.
"""


sns.rugplot(tips["total_bill"])

#kde


plt.show()
Ejemplo n.º 38
0
num_pixels = 2000
rows = sc.range(num_pixels, numSlices=10)
cols = sc.range(num_pixels, numSlices=10)

indices = rows.cartesian(cols)

def mandelbrot_wrapper(row, col):
    x = col/(num_pixels/4.) - 2.
    y = row/(num_pixels/4.) - 2.

    return ((row, col), P2.mandelbrot(x, y))

########### Different from part A: load balancing! ########
new_indices = indices.repartition(100) # Randomly throw jobs between partitions

mandelbrot_load_balanced = new_indices.map(lambda a: mandelbrot_wrapper(*a))

summed_rdd = P2.sum_values_for_partitions(mandelbrot_load_balanced)
summed_result = summed_rdd.collect()

# Now collect the data & plot
plt.hist(summed_result, bins=np.logspace(3, 8, 20))
sns.rugplot(summed_result, color='red')
plt.gca().set_xscale('log')
plt.xlabel('Total Number of Iterations on Partition')
plt.ylabel('Partition Count')
plt.title('Number of Iterations on each Partition')

plt.savefig('P2b_alternative_hist.png', dpi=200, bbox_inches='tight')
Ejemplo n.º 39
0
x_d = np.arange(np.min(x) - nSpace, np.max(x) + nSpace + 1, dtype = np.float32)

support = np.linspace(np.min(x) - nSpace, np.max(x) + nSpace +1, 100)

x = x.reshape(-1, 1)

variance = 4

a_ = norm(x, variance).pdf(support)

for a_row in a_:
    plt.plot(support, a_row, color="r")

"""c"""

sns.rugplot(x, color=".2", linewidth=3)

density = np.sum(a_, axis = 0)

plt.plot(support, density)


plt.show()


a_ = norm(x, variance).pdf(x_d)

density = np.sum(a_, axis = 0)


m = density > 0.001
Ejemplo n.º 40
0
from pandas import Series,DataFrame
import pandas as pd
import csv
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
get_ipython().magic(u'matplotlib inline')

#Read in and inspect ridership data
dframe = pd.read_csv('total_wmata_riders.csv')
dframe.head()
dframe.describe()
riders_frame = dframe['riders']

#Develop topline visualizations
sns.rugplot(riders_frame)
plt.suptitle("Rug Plot for Total Ridership by Station")
sns.kdeplot(riders_frame)
plt.suptitle("Density Plot for Riders by Station")

#Identify top and bottom quartiles of station ridership
top_quartile = dframe[dframe['riders']>589680]
top_quartile.head()
top_quartile
bottom_quartile = dframe[dframe['riders']<254567]
bottom_quartile.head()
top_riders = top_quartile['riders']
bottom_riders = bottom_quartile['riders']

#Visualize top and bottom quartiles
sns.violinplot([bottom_riders])
Ejemplo n.º 41
0
# from https://web.stanford.edu/~mwaskom/software/seaborn/tutorial/distributions.html


import numpy as np
import pandas as pd
from scipy import stats, integrate
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)


np.random.seed(sum(map(ord, "distributions")))

x = np.random.normal(size = 100)
sns.distplot(x);

mean, cov = [0, 1], [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns=["x", "y"])
sns.jointplot(x="x", y="y", data=df);

sns.jointplot(x="x", y="y", data=df, kind="kde");

f, ax = plt.subplots(figsize=(6, 6))
sns.kdeplot(df.x, df.y, ax=ax)
sns.rugplot(df.x, color="g", ax=ax)
sns.rugplot(df.y, vertical=True, ax=ax);

iris = sns.load_dataset("iris")
sns.pairplot(iris);
Ejemplo n.º 42
0
#####################################
# create a distribution object

hist = scipy.stats.rv_histogram((counts, binlims))

#####################################
# plot it

# make an x axis for plotting
padding = 3
n_points = 10000
x = np.linspace(binlims[0] - padding, binlims[-1] + padding, n_points)

plt.plot(x, hist.pdf(x))

#####################################
# plot the cumulative histogram

plt.plot(x, hist.cdf(x))

##########################################################
# sample from the histogram (aka draw random variates)

n_sample = 30
sample = hist.rvs(size=n_sample)

#############################################################################
# let's plot it (use seaborn to plot the data points as small vertical bars)
plt.hist(sample, bins='auto', alpha=.5)
sns.rugplot(sample, color='k', linewidth=3)
Ejemplo n.º 43
0
def ale_plot(
    model,
    train_set,
    features,
    bins=10,
    monte_carlo=False,
    predictor=None,
    features_classes=None,
    monte_carlo_rep=50,
    monte_carlo_ratio=0.1,
    rugplot_lim=1000,
):
    """Plots ALE function of specified features based on training set.

    Parameters
    ----------
    model : object
        An object that implements a 'predict' method. If None, a `predictor` function
        must be supplied which will be used instead of `model.predict`.
    train_set : pandas.core.frame.DataFrame
        Training set on which model was trained.
    features : [2-iterable of] column label
        One or two features for which to plot the ALE plot.
    bins : [2-iterable of] int, optional
        Number of bins used to split feature's space. 2 integers can only be given
        when 2 features are supplied in order to compute a different number of
        quantiles for each feature.
    monte_carlo : boolean, optional
        Compute and plot Monte-Carlo samples.
    predictor : callable
        Custom prediction function. See `model`.
    features_classes : iterable of str, optional
        If features is first-order and a categorical variable, plot ALE according to
        discrete aspect of data.
    monte_carlo_rep : int
        Number of Monte-Carlo replicas.
    monte_carlo_ratio : float
        Proportion of randomly selected samples from dataset for each Monte-Carlo
        replica.
    rugplot_lim : int, optional
        If `train_set` has more rows than `rugplot_lim`, no rug plot will be plotted.
        Set to None to always plot rug plots. Set to 0 to always plot rug plots.

    Raises
    ------
    ValueError
        If both `model` and `predictor` are None.
    ValueError
        If `len(features)` not in {1, 2}.
    ValueError
        If multiple bins were given for 1 feature.
    NotImplementedError
        If `features_classes` is not None.

    """
    if model is None and predictor is None:
        raise ValueError("If 'model' is None, 'predictor' must be supplied.")

    if features_classes is not None:
        raise NotImplementedError("'features_classes' is not implemented yet.")

    fig, ax = plt.subplots()

    features = _parse_features(features)

    if len(features) == 1:
        if not isinstance(bins, (int, np.integer)):
            raise ValueError(
                "1 feature was given, but 'bins' was not an integer.")

        if features_classes is None:
            # Continuous data.

            if monte_carlo:
                mc_replicates = np.asarray([[
                    np.random.choice(range(train_set.shape[0]))
                    for _ in range(int(monte_carlo_ratio * train_set.shape[0]))
                ] for _ in range(monte_carlo_rep)])
                for k, rep in enumerate(mc_replicates):
                    train_set_rep = train_set.iloc[rep, :]
                    # Make this recursive?
                    if features_classes is None:
                        # The same quantiles cannot be reused here as this could cause
                        # some bins to be empty or contain disproportionate numbers of
                        # samples.
                        mc_ale, mc_quantiles = _first_order_ale_quant(
                            model.predict if predictor is None else predictor,
                            train_set_rep,
                            features[0],
                            bins,
                        )
                        _first_order_quant_plot(ax,
                                                mc_quantiles,
                                                mc_ale,
                                                color="#1f77b4",
                                                alpha=0.06)

            ale, quantiles = _first_order_ale_quant(
                model.predict if predictor is None else predictor,
                train_set,
                features[0],
                bins,
            )
            _ax_labels(ax, "Feature '{}'".format(features[0]), "")
            _ax_title(
                ax,
                "First-order ALE of feature '{0}'".format(features[0]),
                "Bins : {0} - Monte-Carlo : {1}".format(
                    len(quantiles) - 1,
                    mc_replicates.shape[0] if monte_carlo else "False",
                ),
            )
            ax.grid(True, linestyle="-", alpha=0.4)
            if rugplot_lim is None or train_set.shape[0] <= rugplot_lim:
                sns.rugplot(train_set[features[0]], ax=ax, alpha=0.2)
            _first_order_quant_plot(ax, quantiles, ale, color="black")
            _ax_quantiles(ax, quantiles)

    elif len(features) == 2:
        if features_classes is None:
            # Continuous data.
            ale, quantiles_list = _second_order_ale_quant(
                model.predict if predictor is None else predictor,
                train_set,
                features,
                bins,
            )
            _second_order_quant_plot(fig, ax, quantiles_list, ale)
            _ax_labels(
                ax,
                "Feature '{}'".format(features[0]),
                "Feature '{}'".format(features[1]),
            )
            for twin, quantiles in zip(("x", "y"), quantiles_list):
                _ax_quantiles(ax, quantiles, twin=twin)
            _ax_title(
                ax,
                "Second-order ALE of features '{0}' and '{1}'".format(
                    features[0], features[1]),
                "Bins : {0}x{1}".format(
                    *[len(quant) - 1 for quant in quantiles_list]),
            )
    else:
        raise ValueError(
            "'{n_feat}' 'features' were given, but only up to 2 are supported."
            .format(n_feat=len(features)))
    plt.show()
    return ax
Ejemplo n.º 44
0
__author__ = 'Noventa'

import numpy as np
import pandas as pd
from numpy.random import randn

from scipy import stats

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

dataset = randn(25)
sns.rugplot(dataset)
plt.ylim(0, 1)
#plt.show()
plt.hist(dataset, alpha=0.3)
sns.rugplot(dataset)
#plt.show()
sns.rugplot(dataset)

x_min = dataset.min() - 2
x_max = dataset.max() + 2

x_axis = np.linspace(x_min, x_max, 100)

bandwidth = ((4*dataset.std()**5)/(3*len(dataset))) ** 0.2

kernel_list = []
for data_pt in dataset:
    kernel = stats.norm(data_pt, bandwidth).pdf(x_axis)
Ejemplo n.º 45
0

# Get the data, and fit the normal distribution
weight = np.array([2784, 2632, 2771, 2495, 2435, 2513, 2633, 2737, 2687, 2647], dtype=np.float32)
(md, sd) = stats.norm.fit(weight)
nd = stats.norm(md, sd)

# Plot the data
sns.set_context(context='poster')

x = np.linspace(2300, 3000)
y = nd.pdf(x)

checkVal = 2460
print('p = {0:5.3f}'.format(nd.cdf(checkVal)))

x1 = np.linspace(2300, checkVal)
y1 = nd.pdf(x1)

sns.rugplot(weight, height=0.0005)
plt.hold(True)
plt.plot(x,y)
plt.fill_between(x1, y1, alpha=0.3)

outDir = r'C:\Users\p20529\Documents\Teaching\Master_FH\Stats\Images'
outFile = os.path.join(outDir, 'pdf_checkMean.png')
plt.savefig(outFile, dpi=200)
print('Figure saved to {0}'.format(outFile))

plt.show()
Ejemplo n.º 46
0
def plot(ctx):
    # Sets up the figure and subplots
    if ctx.obj['trj'] == 'empty':

        im_gen = plt.imread(
            '/home/josh/PycharmProjects/saarama_project/saarama/bg/rama_bg.png'
        )
        im_gly = plt.imread(
            '/home/josh/PycharmProjects/saarama_project/saarama/bg/gly_bg.png')
        im_pro = plt.imread(
            '/home/josh/PycharmProjects/saarama_project/saarama/bg/pro_bg.png')
        im_pre = plt.imread(
            '/home/josh/PycharmProjects/saarama_project/saarama/bg/pre_bg.png')

        fig = plt.figure(constrained_layout=True)
        gs = gridspec.GridSpec(2, 2, figure=fig)

        ax1 = fig.add_subplot(gs[0, 0])
        ax1.imshow(im_gen, extent=(-180, 180, -180, 180))
        ax1.scatter(ctx.obj['phi_gen'],
                    ctx.obj['psi_gen'],
                    s=15,
                    color='dimgray')
        ax1.plot([0, 0], [-180, 180], c='k', alpha=0.3)
        ax1.plot([-180, 180], [0, 0], c='k', alpha=0.3)
        ax1.set_xlim(-180, 180)
        ax1.set_ylim(-180, 180)
        ax1.set_xlabel('φ')
        ax1.set_ylabel('ψ')
        ax1.set_title('Ramachandran plot')

        ax2 = fig.add_subplot(gs[0, 1])
        ax2.scatter(ctx.obj['phi_gly'],
                    ctx.obj['psi_gly'],
                    s=15,
                    color='dimgray')
        ax2.imshow(im_gly, extent=(-180, 180, -180, 180))
        ax2.plot([0, 0], [-180, 180], c='k', alpha=0.3)
        ax2.plot([-180, 180], [0, 0], c='k', alpha=0.3)
        ax2.set_xlim(-180, 180)
        ax2.set_ylim(-180, 180)
        ax2.set_xlabel('φ')
        ax2.set_ylabel('ψ')
        ax2.set_title('Ramachandran plot of Glycine')

        ax3 = fig.add_subplot(gs[1, 0])
        ax3.scatter(ctx.obj['phi_pro'],
                    ctx.obj['psi_pro'],
                    s=15,
                    color='dimgray')
        ax3.imshow(im_pro, extent=(-180, 180, -180, 180))
        ax3.plot([0, 0], [-180, 180], c='k', alpha=0.3)
        ax3.plot([-180, 180], [0, 0], c='k', alpha=0.3)
        ax3.set_xlim(-180, 180)
        ax3.set_ylim(-180, 180)
        ax3.set_xlabel('φ')
        ax3.set_ylabel('ψ')
        ax3.set_title('Ramachandran plot of Proline')

        ax4 = fig.add_subplot(gs[1, 1])
        ax4.scatter(ctx.obj['phi_pre'],
                    ctx.obj['psi_pre'],
                    s=15,
                    color='dimgray')
        ax4.imshow(im_pre, extent=(-180, 180, -180, 180))
        ax4.plot([0, 0], [-180, 180], c='k', alpha=0.3)
        ax4.plot([-180, 180], [0, 0], c='k', alpha=0.3)
        ax4.set_xlim(-180, 180)
        ax4.set_ylim(-180, 180)
        ax4.set_xlabel('φ')
        ax4.set_ylabel('ψ')
        ax4.set_title('Ramachandran plot of Pre-proline')

        plt.show()

    elif ctx.obj['trj'] == 'filled':

        plt.style.use('seaborn-darkgrid')

        fig = plt.figure(constrained_layout=True)
        gs = gridspec.GridSpec(3, 2, figure=fig)
        fig.suptitle('Number of angles: ' + str(len(ctx.obj['psi'])),
                     fontsize=12)

        # Scatter plot

        ax1 = fig.add_subplot(gs[0, 0])
        ax1.scatter(ctx.obj['phi'], ctx.obj['psi'], s=15, color='dimgray')
        ax1.plot([0, 0], [-180, 180], c='k', alpha=0.3)
        ax1.plot([-180, 180], [0, 0], c='k', alpha=0.3)
        ax1.set_xlim(-180, 180)
        ax1.set_ylim(-180, 180)
        ax1.set_xlabel('φ')
        ax1.set_ylabel('ψ')
        ax1.set_title('Ramachandran plot of a single amino acid')

        # Contour plot

        ax2 = fig.add_subplot(gs[0, 1])
        sns.kdeplot(ctx.obj['phi'],
                    ctx.obj['psi'],
                    ax=ax2,
                    cmap='Reds',
                    shade=True,
                    shade_lowest=False)
        sns.rugplot(ctx.obj['phi'], color='k', ax=ax2)
        sns.rugplot(ctx.obj['psi'], color='k', vertical=True, ax=ax2)
        ax2.plot([0, 0], [-180, 180], c='k', alpha=0.3)
        ax2.plot([-180, 180], [0, 0], c='k', alpha=0.3)
        ax2.set_xlabel('φ')
        ax2.set_ylabel('ψ')
        ax2.set_title('Contour plot of a single amino acid')
        '''
        N_bins = 120

        counts, xedges, yedges, im = ax2.hist2d(ctx.obj['phi'], ctx.obj['psi'], bins=N_bins, density=True, cmap='plasma')
        fig.colorbar(im, ax=ax2)
        '''
        # Calculates angle differences

        psi_trans = angle_trans(ctx.obj['psi'])
        phi_trans = angle_trans(ctx.obj['phi'])
        psi_diff = angle_diff(psi_trans)
        phi_diff = angle_diff(phi_trans)

        combined_list = psi_diff + phi_diff

        # Angle difference over time for Phi

        ax3 = fig.add_subplot(gs[1, 0])
        # average_phi = sum(ctx.obj['phi'])/len(ctx.obj['phi'])
        # ax3.plot([0, len(ctx.obj['phi'])], [average_phi, average_phi], color='k', alpha=0.75, label='φ-avg: '+str(round(average_phi, 2)))
        # ax3.plot(range(0, len(ctx.obj['phi'])), ctx.obj['phi'], color='indianred')
        average_phi = sum(phi_diff) / len(phi_diff)
        stdev_phi = statistics.stdev(phi_diff)
        ax3.plot([0, len(phi_diff)], [average_phi, average_phi],
                 color='k',
                 alpha=0.75,
                 label='φ-avg: ' + str(round(average_phi, 2)) + ', stdev: ' +
                 str(round(stdev_phi, 2)))
        ax3.plot(phi_diff, color='indianred', alpha=0.5)
        ax3.scatter(range(len(phi_diff)), phi_diff, marker='x', s=5, color='k')
        ax3.set_ylabel('Angel difference')
        ax3.set_xlabel('Timeframe')
        ax3.set_title('φ-difference over time')
        ax3.set_ylim(min(combined_list) - 5, max(combined_list) + 5)
        ax3.set_xlim(0, len(ctx.obj['phi']))
        plt.legend(loc='upper right', borderaxespad=0.)

        # Angle difference over time for Psi

        ax4 = fig.add_subplot(gs[1, 1])
        # average_psi = sum(ctx.obj['psi'])/len(ctx.obj['psi'])
        # ax4.plot([0, len(ctx.obj['psi'])], [average_psi, average_psi], color='k', alpha=0.75, label='ψ-avg: '+str(round(average_psi, 2)))
        # ax4.plot(range(0, len(ctx.obj['psi'])), ctx.obj['psi'], color='darkkhaki')
        average_psi = sum(psi_diff) / len(psi_diff)
        stdev_psi = statistics.stdev(psi_diff)
        ax4.plot([0, len(psi_diff)], [average_psi, average_psi],
                 color='k',
                 alpha=0.75,
                 label='ψ-avg: ' + str(round(average_psi, 2)) + ', stdev: ' +
                 str(round(stdev_psi, 2)))
        ax4.plot(psi_diff, color='darkkhaki', alpha=0.5)
        ax4.scatter(range(len(psi_diff)), psi_diff, marker='x', s=5, color='k')
        ax4.set_ylabel('Angel difference')
        ax4.set_xlabel('Timeframe')
        ax4.set_title('ψ-difference over time')
        ax4.set_ylim(min(combined_list) - 5, max(combined_list) + 5)
        ax4.set_xlim(0, len(ctx.obj['phi']))
        plt.legend(loc='upper right', borderaxespad=0.)

        # 3D Density plot

        # bins_list_phi = list(range(int(min(psi_trans)), int(max(psi_trans)), 1))
        # bins_list_psi = list(range(int(min(phi_trans)), int(max(phi_trans)), 1))
        bins_list_phi = list(
            range(int(min(ctx.obj['phi'])), int(max(ctx.obj['phi'])), 1))
        bins_list_psi = list(
            range(int(min(ctx.obj['psi'])), int(max(ctx.obj['psi'])), 1))

        ax5 = fig.add_subplot(gs[2, 0], projection='3d')

        x = np.asarray(ctx.obj['phi'])
        y = np.asarray(ctx.obj['psi'])

        deltaX = (max(x) - min(x)) / 10
        deltaY = (max(y) - min(y)) / 10

        xmin = min(x) - deltaX
        xmax = max(x) + deltaX
        ymin = min(y) - deltaY
        ymax = max(y) + deltaY

        xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]

        positions = np.vstack([xx.ravel(), yy.ravel()])
        values = np.vstack([x, y])
        kernel = st.gaussian_kde(values)
        f = np.reshape(kernel(positions).T, xx.shape)

        # ax5.plot_wireframe(xx, yy, f, alpha=0.8)
        ax5.plot_surface(xx,
                         yy,
                         f,
                         rstride=1,
                         cstride=1,
                         edgecolor='none',
                         cmap='plasma')
        ax5.set_xlim(-180, 180)
        ax5.set_ylim(-180, 180)
        ax5.set_xlabel('φ')
        ax5.set_ylabel('ψ')
        ax5.set_zlabel('Density')
        ax5.set_title('Surface plot of angle distributions')
        ax5.view_init(20, 280)

        # Histogram/Density plot for angle distribution

        ax6 = fig.add_subplot(gs[2, 1])
        # sns.distplot(phi_trans, ax=ax5, bins=bins_list_phi, color = 'indianred', label='φ')
        # sns.distplot(psi_trans, ax=ax5, bins=bins_list_psi, color = 'darkkhaki', label='ψ')
        # ax5.set_xlim(0,360)
        # ax5.hist(ctx.obj['phi'], bins=bins_list_phi, alpha=0.75, color = 'indianred', label='φ')
        # ax5.hist(ctx.obj['psi'], bins=bins_list_psi, alpha=0.75, color = 'darkkhaki', label='ψ')
        sns.distplot(ctx.obj['phi'],
                     ax=ax6,
                     bins=bins_list_phi,
                     color='indianred',
                     label='φ')
        sns.distplot(ctx.obj['psi'],
                     ax=ax6,
                     bins=bins_list_psi,
                     color='darkkhaki',
                     label='ψ')
        ax6.set_xlim(-180, 180)
        ax6.set_title('Histogram/Density plot of angle distribution')
        plt.legend()
        '''
        #Polar plots that are not included yet

        ax6 = fig.add_subplot(gs[3, 0], projection='polar')
        bin_size = 20
        a, b = np.histogram(phi_trans, bins=np.arange(0, 360 + bin_size, bin_size))
        centers = np.deg2rad(np.ediff1d(b) // 2 + b[:-1])
        ax6.bar(centers, a, width=np.deg2rad(bin_size), bottom=0.0, color='.8', edgecolor='k')
        ax6.set_theta_zero_location("N")
        ax6.set_theta_direction(-1)

        ax7 = fig.add_subplot(gs[3, 1], projection='polar')
        bin_size = 20
        a, b = np.histogram(psi_trans, bins=np.arange(0, 360 + bin_size, bin_size))
        centers = np.deg2rad(np.ediff1d(b) // 2 + b[:-1])
        ax7.bar(centers, a, width=np.deg2rad(bin_size), bottom=0.0, color='.8', edgecolor='k')
        ax7.set_theta_zero_location("N")
        ax7.set_theta_direction(-1)
        '''

        plt.show()
Ejemplo n.º 47
0
    plt.subplot(sub)
    plt.title('DMC-' + i, fontsize=14, position=(0.5, 1.05))
    sns.kdeplot(
        df1[i],
        df1['DMC'],  # demonstrate the probability distribution of two variables
        cbar=True,  # display color bar
        shade=True,  # display shades
        cmap='Blues',  # set the color palatte
        shade_lowest=False,  # not display periphery color/shade
        n_levels=40  # number of curves, the higher, the smoother
    )  # the color change indicates the change of density
    plt.grid(linestyle='--')
    plt.scatter(
        df1[i], df1['DMC'], s=5, alpha=0.5, color=C,
        marker='+')  #scatter: green indicates no fire, red indicates fire
    sns.rugplot(df1[i], color='g', axis='x', alpha=0.5)
    sns.rugplot(df1['DMC'], color='r', axis='y', alpha=0.5)
    if sub == 133:
        plt.axis([-6, 6.5, 0, 300
                  ])  # move the plots to central area for better observation
    sub += 1
plt.show()

plt.figure(18)  # DMC-temp
plt.title('DMC-temp', fontsize=14, position=(0.5, 1.05))
pal = 'Blues'
sns.kdeplot(df1['temp'],
            df1['DMC'],
            cbar=True,
            shade=True,
            cmap=pal,
Ejemplo n.º 48
0
def plot_lean_one(b, s, side, **kwargs):
    print 'creating {0} LEAN for building {1} ...'.format(side, b)
    sns.set_style("darkgrid")
    sns.set_palette("Set2")
    sns.set_context("talk", font_scale=1)
    gas_line_color = 'deeppink'
    gas_mk_color = 'crimson'
    elec_line_color = 'navy'
    elec_mk_color = 'deepskyblue'
    base_gas_color = 'orange'
    base_elec_color = 'yellow'
    bx = plt.axes()
    if side == 'elec' or side == 'gas':
        t = kwargs[side]['x_range']
        t_min = t[0]
        t_max = t[1]
        par = kwargs[side]['regression_par']
        k = par[0]
        intercept = par[1]
        if type(kwargs[side]['breakpoint']) == int:
            breakpoint = kwargs[side]['breakpoint']
        else:
            break_left = kwargs[side]['breakpoint'][0]
            break_right = kwargs[side]['breakpoint'][1]
            breakpoint = break_left
        base = k * breakpoint + intercept
        xd = np.linspace(t_min, t_max, 150)
        yd = kwargs[side]['fun'](xd, *par) - base
        if side == 'gas':
            plt.plot(xd, yd, gas_line_color)
            bx.fill_between(xd, 0, yd, facecolor=gas_line_color,
                            alpha=0.3)
            rug_x = kwargs[side]['x']
            rug_x = [x for x in rug_x if x < breakpoint]
            sns.rugplot(rug_x, ax=bx, color=gas_line_color)
        elif side == 'elec':
            plt.plot(xd, yd, elec_line_color)
            bx.fill_between(xd, 0, yd, facecolor=elec_line_color,
                            alpha=0.3)
            rug_x = kwargs[side]['x']
            try: 
                break_left
                rug_x = [x for x in rug_x if x > break_right or x <
                         break_left]
            except NameError:
                rug_x = [x for x in rug_x if x > breakpoint]
            sns.rugplot(rug_x, ax=bx, color=elec_line_color)
        plt.ylim((0, max(yd) * 1.1))
    else:
        t = kwargs['gas']['x_range']
        t_min = t[0]
        t_max = t[1]
        par_gas = kwargs['gas']['regression_par']
        par_elec = kwargs['elec']['regression_par']
        k_gas = par_gas[0]
        intercept_gas = par_gas[1]
        breakpoint_gas = kwargs['gas']['breakpoint']
        base_gas = k_gas * breakpoint_gas + intercept_gas
        k_elec = par_elec[0]
        intercept_elec = par_elec[1]
        if type(kwargs['elec']['breakpoint']) == int:
            breakpoint_elec = kwargs['elec']['breakpoint']
        else:
            break_elec_left = kwargs['elec']['breakpoint'][0]
            break_elec_right = kwargs['elec']['breakpoint'][1]
            breakpoint_elec = break_elec_left
        base_elec = k_elec * breakpoint_elec + intercept_elec
        xd = np.linspace(t_min, t_max, 150)
        yd_gas = (kwargs['gas']['fun'](xd, *par_gas)) + base_elec
        yd_elec = (kwargs['elec']['fun'](xd, *par_elec)) + base_gas
        if side == 'combined':
            plt.plot(xd, yd_gas, gas_line_color)
            plt.plot(xd, yd_elec, elec_line_color)
            bx.fill_between(xd, base_elec + base_gas, yd_elec,
                            facecolor=elec_line_color, alpha=0.3)
            bx.fill_between(xd, base_elec + base_gas, yd_gas,
                            facecolor=gas_line_color, alpha=0.3)
            plt.ylim((0, max(max(yd_elec), max(yd_gas)) * 1.1))
        elif side == 'base':
            plt.plot(xd, [base_elec] * len(xd), base_elec_color)
            plt.plot(xd, [base_elec + base_gas] * len(xd),
                     base_gas_color)
        bx.fill_between(xd, 0, base_elec,
                        facecolor=base_elec_color, alpha=0.5)
        bx.fill_between(xd, base_elec, base_elec + base_gas,
                        facecolor=base_gas_color, alpha=0.5)
        plt.ylim((0, max(max(yd_elec), max(yd_gas)) * 1.1))
        rug_x = kwargs['gas']['x']
        sns.rugplot(rug_x, ax=bx, color='gray')
    plt.title('Lean {0} plot, Building {1}, station {2}'.format(title_dict[side], b, s))
    plt.xlabel('Monthly Mean Temperature, Deg F')
    plt.ylabel(ylabel_dict[side])
    plt.tight_layout()
    P.savefig(os.getcwd() + '/plot_FY_weather/lean_piecewise/{0}_{1}_{2}.png'.format(b, s, side), dpi = 150, bbox_inches='tight')
    # plt.show()
    plt.close()