def plot_and_savefig(self, out_path=None):
        sns.set_context('notebook')
        sns.set_style('white')

        plot_w = 3 + len(self.data['sample'].unique())
        plot_h = 3.5
        plots_per_row = 3

        n_plots = len(self.data.columns) - 2
        n_rows = ceil(n_plots / plots_per_row)
        n_cols = ceil(n_plots / n_rows)
        ax_ids = list(np.arange(n_plots) + 1)

        fig = plt.figure()
        fig.set_figheight(plot_h * n_rows)
        fig.set_figwidth(plot_w * n_cols)

        for i, category in enumerate(self.data.columns):
            if category in ['CATEGORY', 'sample']:
                continue

            ax = fig.add_subplot(n_rows, n_cols, ax_ids.pop(0))
            self.draw_ax(ax, category)
            if i == 0:
                ax.legend()
            else:
                ax.legend_.set_visible(False)

        plt.tight_layout()
        if out_path:
            plt.savefig(out_path, dpi=300, bbox_inches='tight')

        return ax
Example #2
0
def dist_small_multiples(df, figsize=(20, 20)):
    """
    Small multiples plots of the distribution of a dataframe's variables.
    """
    import math

    sns.set_style("white")

    num_plots = len(df.columns)
    n = int(math.ceil(math.sqrt(num_plots)))

    fig = plt.figure(figsize=figsize)
    axes = [plt.subplot(n, n, i) for i in range(1, num_plots + 1)]

    i = 0
    for k, v in df.iteritems():
        ax = axes[i]
        sns.kdeplot(v, shade=True, ax=ax, legend=False)
        sns.rugplot(v, ax=ax, c=sns.color_palette("husl", 3)[0])
        [label.set_visible(False) for label in ax.get_yticklabels()]
        ax.xaxis.set_ticks([v.min(), v.max()])
        ax.set_title(k)
        i += 1
    sns.despine(left=True, trim=True, fig=fig)
    plt.tight_layout()
    return fig, axes
Example #3
0
def UseSeaborn(palette='deep'):
    """Call to use seaborn plotting package
    """
    import seaborn as sns
    #No Background fill, legend font scale, frame on legend
    sns.set(style='whitegrid', font_scale=1.5, rc={'legend.frameon': True})
    #Mark ticks with border on all four sides (overrides 'whitegrid')
    sns.set_style('ticks')
    #ticks point in
    sns.set_style({"xtick.direction": "in","ytick.direction": "in"})

    # sns.choose_colorbrewer_palette('q')

    #Nice Blue,green,Red
    # sns.set_palette('colorblind')
    if palette == 'xkcd':
        #Nice blue, purple, green
        sns.set_palette(sns.xkcd_palette(xkcdcolors))
    else:
        sns.set_palette(palette)
    #Nice blue, green red
    # sns.set_palette('deep')

    # sns.set_palette('Accent_r')
    # sns.set_palette('Set2')
    # sns.set_palette('Spectral_r')
    # sns.set_palette('spectral')

    #FIX INVISIBLE MARKER BUG
    sns.set_context(rc={'lines.markeredgewidth': 0.1})
Example #4
0
def style(mod = None):
    sb.set(font_scale=0.8)
    # sb.set_style("white")
    # sns.set_style("ticks")
    sb.set_style({'lines.linewidth': 0.3,
                   'axes.labelcolor': '.0',
                   'axes.linewidth': 0.5,
                   'axes.edgecolor': '.2',
                   'axes.facecolor': 'white',
                   'axes.grid': True,
                   'font.family': ['sans-serif'],
                   'font.sans-serif': ['Arial'],
                   'grid.linewidth': 0.5,
                   'grid.color': '.9',
                   'text.color': '.0',
                   'savefig.dpi': 100,
                   'xtick.color': '.0',
                   'ytick.color': '.0',
                   'xtick.color': '.0',
                   'xtick.direction': 'in',
                   'xtick.major.size': 3.0,
                   'xtick.minor.size': 1,
                   'xtick.major.width': 0.5,
                   'xtick.minor.width': 0.5,
                   'xtick.major.pad':3,
                   'ytick.color': '.0',
                   'ytick.direction': 'in',
                   'ytick.major.size': 3.0,
                   'ytick.minor.size': 1,
                   'ytick.major.width': 0.5,
                   'ytick.minor.width': 0.5,
                   'ytick.major.pad':3,
                   # 'axes.labelpad': 0.3,
                   'savefig.transparent': True,
                  })
Example #5
0
    def prepare_plots(self):
        """
        Generates analysis plots used for the final report (as seen in Fig. 7).

        """

        sns.set_style('darkgrid')
        fig = plt.figure()
        ax = fig.add_subplot(111)

        ax.set_xlabel('Initial population size (N)')
        ax.set_ylabel('Variation of adult counts over 10 years')

        a = np.arange(1, len(self.initial_pops)+1, 1)
        b, c = [], []

        for pop in self.initial_pops:
            result = self.result_dict[pop]

            b.append(result['mean_stdev'])
            c.append(result['ci'])

        ax.errorbar(a,b,yerr=c)
        ax.scatter(a,b,s=40)
        ax.plot(a,b)
        plt.xticks(a,self.initial_pops)

        plt.show()
        plt.savefig('results/results.png', bbox_inches='tight')
Example #6
0
def plot_dist_matrix(matrix, fasta_names, heatmap_out, dendrogram_out):
    """Cluster the distance matrix hierarchically and plot using seaborn.
    Average linkage method is used."""
    # Load required modules for plotting
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    import seaborn as sns
    import pandas as pd
    from scipy.cluster.hierarchy import dendrogram, linkage

    # Create
    pdm = pd.DataFrame(matrix, index=fasta_names, columns=fasta_names)

    # Plot heatmap
    figsizex = max(10, len(fasta_names) / 4)
    clustergrid = sns.clustermap(pdm, metric='euclidean', method='average',
            figsize=(figsizex, figsizex))
    clustergrid.savefig(heatmap_out)

    # Plot dendrogram
    sns.set_style('white')
    figsizey = max(10, len(fasta_names) / 8)
    f, ax = plt.subplots(figsize=(figsizex, figsizey))
    link = linkage(pdm, metric='euclidean', method='average')
    dendrogram(link, labels=pdm.index, ax=ax)
    no_spine = {'left': True, 'bottom': True, 'right': True, 'top': True}
    sns.despine(**no_spine)
    plt.xticks(rotation=90)
    f.tight_layout()
    plt.savefig(dendrogram_out)
def pltsns(style='ticks',context='talk'):
    global figdir
    sns.set_style(style)
    sns.set_style({'legend.frameon':True})
    sns.set_context(context)
    #figdir = datadir+'samoa/WATERSHED_ANALYSIS/GoodFigures/rawfigoutput/'
    return
def showResults(challenger_data, model):
    ''' Show the original data, and the resulting logit-fit'''
    
    temperature = challenger_data[:,0]
    failures = challenger_data[:,1]
    
    # First plot the original data
    plt.figure()
    setFonts()
    sns.set_style('darkgrid')
    np.set_printoptions(precision=3, suppress=True)
    
    plt.scatter(temperature, failures, s=200, color="k", alpha=0.5)
    plt.yticks([0, 1])
    plt.ylabel("Damage Incident?")
    plt.xlabel("Outside Temperature [F]")
    plt.title("Defects of the Space Shuttle O-Rings vs temperature")
    plt.tight_layout
    
    # Plot the fit
    x = np.arange(50, 85)
    alpha = model.params[0]
    beta = model.params[1]
    y = logistic(x, beta, alpha)
    
    plt.hold(True)
    plt.plot(x,y,'r')
    plt.xlim([50, 85])
    
    outFile = 'ChallengerPlain.png'
    showData(outFile)
Example #9
0
def plot_op(operation):
    """ Plots operation for all models
    """

    df = pd.read_csv(RESULT_FOLDER + RESULT_FILE, usecols=[1, 2, 3])
    print(df.columns)
    df.columns = ['mo', 'node', 'time']
    #print df.head()

    ele = mo(operation)

    qpare = df[df.mo == ele[0]]
    qpare = qpare.append(df[df.mo == ele[1]])
    qpare = qpare.append(df[df.mo == ele[2]])
    qpare = qpare.append(df[df.mo == ele[3]])

    f, ax = plt.subplots()
    ax.set(yscale="log")

    ax.set_title('Query time')
    sns.set_style("whitegrid")
    sns.boxplot(x='mo', y='time', data=qpare)
    ax.set_xlabel("model-operation")
    ax.set_ylabel("time [s]")

    #sns.plt.show()
    sns.plt.savefig(RESULT_FOLDER + operation + '.png')
    sns.plt.clf()
def show_binomial():
    """Show an example of binomial distributions"""
    
    bd1 = stats.binom(20, 0.5)
    bd2 = stats.binom(20, 0.7)
    bd3 = stats.binom(40, 0.5)
    
    k = np.arange(40)
    
    sns.set_context('paper')
    sns.set_style('ticks')
    mystyle.set(14)
    
    markersize = 8
    plt.plot(k, bd1.pmf(k), 'o-b', ms=markersize)
    plt.hold(True)
    plt.plot(k, bd2.pmf(k), 'd-r', ms=markersize)
    plt.plot(k, bd3.pmf(k), 's-g', ms=markersize)
    plt.title('Binomial distribuition')
    plt.legend(['p=0.5 and n=20', 'p=0.7 and n=20', 'p=0.5 and n=40'])
    plt.xlabel('X')
    plt.ylabel('P(X)')
    sns.despine()
    
    mystyle.printout_plain('Binomial_distribution_pmf.png')
    
    plt.show()
Example #11
0
 def main():
     seaborn_Seaborn_Module.set_style("dark")
     housing_2013 = pandas_Pandas_Module.read_csv("../Hud_2013.csv")
     cols = ['AGE1', 'FMR', 'TOTSAL']
     filtered_housing_2013 = housing_2013[cols]
     filtered_housing_2013.hist(column='FMR', bins=20)
     matplotlib_pyplot_Pyplot_Module.show()
Example #12
0
def make_plotdir():
    "make plot directory on file system"
    sns.set_style("darkgrid")
    plotdir = get_plotdir()
    if not os.access(plotdir, os.F_OK):
        os.mkdir(plotdir)
    return plotdir
Example #13
0
def generate_plot(csv_file_name, plot_file_name, x, y, hue, y_title, xticklabels_rotation=90):
    sns.set(font_scale=1.5)

    sns.set_style("white", {"legend.frameon": True})

    df = pd.read_csv(csv_file_name)

    ax = sns.barplot(data=df, x=x, y=y, hue=hue, palette=sns.color_palette("Paired"))
    ax.set_xlabel('')
    ax.set_ylabel(y_title)

    labels = ax.get_xticklabels()
    ax.set_xticklabels(labels, rotation=xticklabels_rotation)

    fig = ax.get_figure()

    if hue:
        legend = ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
        legend.set_label('')

        fig.savefig(plot_file_name, bbox_extra_artists=(legend,), bbox_inches='tight')
        fig.savefig(plot_file_name + '.jpg', bbox_extra_artists=(legend,), bbox_inches='tight')
    else:
        fig.tight_layout()

        fig.savefig(plot_file_name)
        fig.savefig(plot_file_name + '.jpg')

    plt.clf()
    plt.close('all')
Example #14
0
def showResults(challenger_data, model):
    ''' Show the original data, and the resulting logit-fit'''
    
    # First plot the original data
    plt.figure()
    sns.set_context('poster')
    sns.set_style('whitegrid')
    np.set_printoptions(precision=3, suppress=True)
    
    plt.scatter(challenger_data[:, 0], challenger_data[:, 1], s=75, color="k",
                alpha=0.5)
    plt.yticks([0, 1])
    plt.ylabel("Damage Incident?")
    plt.xlabel("Outside temperature (Fahrenheit)")
    plt.title("Defects of the Space Shuttle O-Rings vs temperature")
    plt.xlim(50, 85)
    
    # Plot the fit
    x = np.arange(50, 85)
    alpha = model.params[0]
    beta = model.params[1]
    y = logistic(x, beta, alpha)
    
    plt.hold(True)
    plt.plot(x,y,'r')
    outFile = 'ChallengerPlain.png'
    C2_8_mystyle.printout_plain(outFile, outDir='..\Images')
    plt.show()
Example #15
0
File: pdf.py Project: ninoc/pyigm
    def hist_plot(self, bokeh=False):
        """ Simple histogram plot of the PDF

        Parameters
        ----------
        bokeh : bool, optional
          Generate a bokeh plot?

        Returns
        -------

        """
        if not bokeh:
            from matplotlib import pyplot as plt
            # imports
            try:
                import seaborn as sns; sns.set_style("white")
            except:
                pass
            # Giddy up
            plt.clf()
            plt.bar(self.x-self.dx/2., self.pdf, width=self.dx)
            plt.xlabel("x")
            plt.ylabel("PDF(x)")
            plt.show()
            plt.close()
        else:
            from bokeh.io import show
            from bokeh.plotting import figure
            p = figure(plot_width=400, plot_height=400, title='x PDF')
            p.quad(top=self.pdf, bottom=0, left=self.x-self.dx/2.,
                   right=self.x+self.dx/2.)
            p.xaxis.axis_label = 'x'
            # Show
            show(p)
Example #16
0
    def plot_mfi(self, outputfile='embeddings.pdf', nb_clusters=8, weights='NA'):
        # collect embeddings for mfi:
        X = np.asarray([self.w2v_model[w] for w in self.mfi \
                            if w in self.w2v_model], dtype='float32')
        # dimension reduction:
        tsne = TSNE(n_components=2)
        coor = tsne.fit_transform(X) # unsparsify

        plt.clf()
        sns.set_style('dark')
        sns.plt.rcParams['axes.linewidth'] = 0.4
        fig, ax1 = sns.plt.subplots()  

        labels = self.mfi
        # first plot slices:
        x1, x2 = coor[:,0], coor[:,1]
        ax1.scatter(x1, x2, 100, edgecolors='none', facecolors='none')
        # clustering on top (add some colouring):
        clustering = AgglomerativeClustering(linkage='ward',
                            affinity='euclidean', n_clusters=nb_clusters)
        clustering.fit(coor)
        # add names:
        for x, y, name, cluster_label in zip(x1, x2, labels, clustering.labels_):
            ax1.text(x, y, name, ha='center', va="center",
                     color=plt.cm.spectral(cluster_label / 10.),
                     fontdict={'family': 'Arial', 'size': 8})
        # control aesthetics:
        ax1.set_xlabel('')
        ax1.set_ylabel('')
        ax1.set_xticklabels([])
        ax1.set_xticks([])
        ax1.set_yticklabels([])
        ax1.set_yticks([])
        sns.plt.savefig(outputfile, bbox_inches=0)
def features_pca_classified(fscaled, labels_true, labels_predict, axes=None,
                            algorithm="pca"):
    if algorithm == 'pca':
        pc = PCA(n_components=2)
        fscaled_trans = pc.fit(fscaled).transform(fscaled)
    elif algorithm == "tsne":
        fscaled_trans = TSNE(n_components=2).fit_transform(fscaled)
    else:
        raise AlgorithmUnrecognizedException("Not recognizing method of "+
                                             "dimensionality reduction.")

    sns.set_style("whitegrid")
    plt.rc("font", size=24, family="serif", serif="Computer Sans")
    plt.rc("axes", titlesize=20, labelsize=20)
    plt.rc("text", usetex=True)
    plt.rc('xtick', labelsize=20)
    plt.rc('ytick', labelsize=20)

    # make a Figure object
    if axes is None:
        fig, axes = plt.subplots(1,2,figsize=(16,6), sharey=True)

    ax1, ax2 = axes[0], axes[1]

    ax1 = plotting.scatter(fscaled_trans, labels_true, ax=ax1)

    # second panel: physical labels:

    ax2 = plotting.scatter(fscaled_trans, labels_predict, ax=ax2)

    plt.tight_layout()

    return ax1, ax2
Example #18
0
def plot_swcrel(data, xlabel, ylabel):

    month_lab = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \
                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    sns.set_style("ticks")
    plt.rcParams.update({'mathtext.default': 'regular'})
    kws = dict(s=20, linewidth=.5, edgecolor="none", alpha=0.3)

    wue_plot = sns.FacetGrid(data, hue="Month", size=5)
    wue_plot.map(plt.scatter, xlabel, ylabel, **kws)

    ymax = np.ceil(data[ylabel].mean() + 3*data[ylabel].std())
    xmax = np.max(data[xlabel])
    xmin = np.min(data[xlabel])

    x_ticks = np.arange(0, 0.4, 0.05)
    for wax in wue_plot.axes.ravel():
        wax.xaxis.set_ticks(x_ticks)
        wax.xaxis.set_ticklabels(['%1.2f' %x for x in x_ticks], \
                                 rotation=45, ha="right", fontsize=10)

    wue_plot.set(xlim=(xmin, xmax), ylim=(0, ymax))

    leg = plt.legend(loc='right', labels=month_lab, ncol=1, bbox_to_anchor=(1.3, 0.5), \
                     borderpad=2)
    leg.get_frame().set_edgecolor('black')

    wue_plot.fig.subplots_adjust(right=0.8, wspace=.08, hspace=0.15, top=0.9, bottom=0.25)

    return wue_plot
Example #19
0
def main():
    runResults = []
    # Traverse files, extract matrix, architecture and params
    for f in [f for f in os.listdir(".") if os.path.isfile(f)]:
        if f.startswith("run_Spmv"):
            runResults.append(RunResult(f))

    df = pd.DataFrame([[r.prj, r.matrix, r.gflops_est] for r in runResults])
    grouped = df.groupby(0)
    groups = []
    names = []
    for name, group in grouped:
        group.set_index(1, inplace=True)
        # group.sort_index(inplace=True)
        groups.append(group[2])
        names.append(name)

    new_df = pd.concat(groups, axis=1)
    new_df.columns = names

    sns.set_style("white")
    sns.set_palette(sns.color_palette("cubehelix", 13))
    bar = new_df.plot(kind="bar")
    sns.despine()
    fig = bar.get_figure()
    fig.set_size_inches(15, 15)
    fig.tight_layout()
    fig.savefig("est_gflops.pdf")
	def PlotFrequencyTuningCurves(self, stResponseProb, measure, unit=[], filePath=[]):
		""" Plots measure for multiple frequencies, with a trace for each tone intensity.        
		:param stResponseProb: DataFrames results of Bayesian response analysis for multiple tone stimulus intensities
		:type stResponseProb: pandas DataFrame 
		:param measure: Bayesian response analysis measure ['resProb', 'vocalResMag', 'vocalResMag_MLE', 'effectSize', 'effectSize_MLE', 'spontRate', 'spontRateSTD', 'responseLatency', 'responseLatencySTD', 'responseDuration']
		:type measure: int [0-9]
		:param unit: Unique identifier for cell
		:type unit: str
		:param filePath: Path to directory where results will be saved
		:type filePath: str
		:returns: Handle to plot
		"""		
		measureName = ['resProb', 'vocalResMag', 'vocalResMag_MLE', 'effectSize', 'effectSize_MLE', 'spontRate', 'spontRateSTD', 'responseLatency', 'responseLatencySTD', 'responseDuration']
		tuningData = stResponseProb
# 		sns.set_palette(sns.color_palette("bright", 8))
		attn = stResponseProb.keys()[0]
		firstFreq = stResponseProb[attn].index.tolist()[1]
		sns.set_style("white")
		sns.set_style("ticks")
		ax = stResponseProb.loc[:,firstFreq:,measure].fillna(0).plot(figsize=(6,4))
		sns.despine()
		plt.grid(False)
		plt.title(unit, fontsize=14)
		plt.xlabel('Frequency (kHz)', fontsize=12)
		plt.ylabel(measureName[measure], fontsize=12)
		plt.tick_params(axis='both', which='major', labelsize=14)
		if len(filePath)>0:
			plt.savefig(self.dirPath + filePath + 'freqTuning_'+measureName[measure]+'_'+unit+'.pdf')        
			plt.close()
		else: plt.show()
		return ax
	def PlotBBNResponseCurve(self, bbnResponseProb, measure, unit=[], filePath=[], attn=False):
		""" Plots measure for multiple frequencies and intensities an a contour plot.        
		:param stResponseProb: DataFrames results of Bayesian response analysis for multiple tone stimulus intensities
		:type stResponseProb: pandas DataFrame 
		:param measure: Bayesian response analysis measure ['resProb', 'vocalResMag', 'vocalResMag_MLE', 'effectSize', 'effectSize_MLE', 'spontRate', 'spontRateSTD', 'responseLatency', 'responseLatencySTD', 'responseDuration']
		:type measure: integer [0-9]
		:param unit: Unique identifier for cell
		:type unit: str
		:param filePath: Path to directory where results will be saved
		:type filePath: str
		:returns: Handle to plot
		"""		
		measureName = ['resProb', 'vocalResMag', 'vocalResMag_MLE', 'effectSize', 'effectSize_MLE', 'spontRate', 'spontRateSTD', 'responseLatency', 'responseLatencySTD', 'responseDuration']
		tuningData = bbnResponseProb
		sns.set_palette(sns.color_palette("bright", 8))
		sns.set_context(rc={"figure.figsize": (5, 3)})
		sns.set_style("white")
		sns.set_style("ticks")
		if attn: ax = bbnResponseProb.loc[::-1,measure].fillna(0).plot(figsize=(6,4))
		else: ax = bbnResponseProb.loc[:,measure].fillna(0).plot(figsize=(6,4))
		sns.despine()
		plt.grid(False)
		plt.title(unit, fontsize=14)
		plt.xlabel('SPL (dB)', fontsize=12)
		plt.ylabel(measureName[measure], fontsize=12)
		plt.ylim(0.5,1.0)
# 		plt.gca().invert_xaxis()
		if len(filePath)>0:
			plt.savefig(self.dirPath + filePath + 'bbn_'+measureName[measure]+'_'+unit+'.pdf')        
			plt.close()
		else: plt.show()
		return ax
Example #22
0
File: main.py Project: Arnukk/TDS
def plot_proximity_heatmap(product_space_orig, proximity_matr):
    """
    Given the proximity matrix and the product space matrix produces the heatmap (simply based on sroting)
    @param proximity:
    @param product_space_orig:
    @return:
    """
    x = sorted(product_space_orig)
    y = sorted(product_space_orig)
    intensity = [0]*len(product_space_orig)
    i = 0
    for product in x:
        intensity[i] = [0]*len(product_space_orig)
        j = 0
        for product2 in y:
            if product in proximity_matr and product2 in proximity_matr[product]:
                intensity[i][j] = proximity_matr[product][product2]
            elif product2 in proximity_matr and product in proximity_matr[product2]:
                intensity[i][j] = proximity_matr[product2][product]
            elif product == product2:
                intensity[i][j] = 1
            else:
                pass
            j += 1
        i += 1

    intensity = np.array(intensity)
    f, ax = plt.subplots(figsize=(5, 4))
    sns.set_style("ticks", {'axes.edgecolor': '.0', 'axes.facecolor': 'black'})
    fd = sns.heatmap(intensity, xticklabels=False, yticklabels=False, cmap="RdYlBu_r", square=True)

    f.text(0.865, 0.5, r"Proximity $\phi$", ha='right', va='center', rotation='vertical', fontsize=13)
    f.tight_layout()
    plt.savefig('data/proximityheat.pdf')
    def multi_plot_multi_model_metrics(self):
        index = list(range(len(self.model_list)))
        bw = 0.35
        score_list = ['Accuracy', 'Precision', 'Recall', 'F1', 'ROCAUC']

        plt.figure(figsize=(18,5))

        for j, scoring in enumerate(score_list):
            ax = plt.subplot(151 + j)
            ax.spines['right'].set_visible(False)
            ax.spines['top'].set_visible(False)
            sns.set_style("whitegrid")

            plt.bar(index, self.score_dict[scoring], bw,
                    align = 'center',
                    #color = colors[(i*2)],
                    alpha = 0.6,
                    label = self.index_func)

            plt.title(scoring, fontsize=15, fontweight='bold')
            plt.xticks(index, self.index_func, rotation='vertical')
            plt.ylim(0.0, 1.1)
            if j == 0:
                plt.ylabel('Score',fontsize=20, fontweight='bold')
            #if j == 4:
            #    plt.legend()
            plt.grid(False)
Example #24
0
def compare_spectra():
    import mywfc3.stgrism as st
    import unicorn
    
    ### Fancy colors
    import seaborn as sns
    import matplotlib.pyplot as plt
    cmap = sns.cubehelix_palette(as_cmap=True, light=0.95, start=0.5, hue=0.4, rot=-0.7, reverse=True)
    cmap.name = 'sns_rot'
    plt.register_cmap(cmap=cmap)
    sns.set_style("ticks", {"ytick.major.size":3, "xtick.major.size":3})
    plt.set_cmap('sns_rot')
    #plt.gray()
    
    fig = st.compare_methods(x0=787, y0=712, v=np.array([-1.5,4])*0.6, NX=180, NY=40, direct_off=100, final=True, mask_lim = 0.02)
    #fig.tight_layout()
    unicorn.plotting.savefig(fig, '/tmp/compare_model_star.pdf', dpi=300)

    fig = st.compare_methods(x0=485, y0=332, v=np.array([-1.5,4])*0.2, NX=180, NY=40, direct_off=100, final=True, mask_lim = 0.1)
    unicorn.plotting.savefig(fig, '/tmp/compare_model_galaxy.pdf', dpi=300)

    fig = st.compare_methods(x0=286, y0=408, v=np.array([-1.5,4])*0.08, NX=180, NY=40, direct_off=100, final=True, mask_lim = 0.1)
    unicorn.plotting.savefig(fig, '/tmp/compare_model_galaxy2.pdf', dpi=300)

    fig = st.compare_methods(x0=922, y0=564, v=np.array([-1.5,4])*0.2, NX=180, NY=40, direct_off=100, final=True, mask_lim = 0.15)
    unicorn.plotting.savefig(fig, '/tmp/compare_model_galaxy3.pdf', dpi=300)
def printKeyTypeDNAwithLabels(keyGroups,keydataDF,labelsDF,outputFile="",printIt=False):
    sns.set_style("white")
    labelDict = {'fontsize': 16, 'weight' : 'roman'}
    fig,ax = plt.subplots(figsize=(18,10))
    for g in keyGroups.groups:
        colours = [c for c in keyGroups.get_group(g)['key colour']]
        x = [i for i in keyGroups.get_group(g)['keycode'].index]
        y = [k for k in keyGroups.get_group(g)['keycode']]
        #ax.scatter(x,y,s=100,marker='|',c=colours,linewidths=1,alpha=0.8,label=g)
        ax.scatter(x,y,s=30,marker='o',c=colours,linewidths=0,alpha=0.5,label=g)
    colours = sns.color_palette("GnBu_d",len(labelsDF))    
    for n,(d,l) in enumerate(zip(labelsDF.index,labelsDF['label'])):
        ax.plot([d,d],[0,225],color=colours[n],linewidth=3,alpha=0.5,label=l)
    box = ax.get_position()
    ax.set_position([box.x0,box.y0+box.height*0.7,box.width,box.height*0.3])
    ax.set_xlim(keydataDF.index[0],keydataDF.index[-1])
    ax.legend(loc='upper center',bbox_to_anchor=(0.5,-0.4))
    ax.set_ylabel("keycode",fontdict=labelDict)
    ax.set_xlabel("clock",fontdict=labelDict)
    plt.show()
    if printIt:
        fig.savefig(outputFile,format='png',dpi=256)
    plt.close(fig)
    plt.clf()
    return    
Example #26
0
def stripplot_to_pdf(data, save_path, x=None, y=None, hue=None,
                     style='whitegrid', fontsize=2, rows=1, cols=1,
                     figsize=(4, 4), **kwargs):
    """ Data plotted as stripplot using seaborn and saved in a pdf
    given in save_path

    Parameters
    ----------
    data : pd.DataFrame or path to csv file
        single or list of data to plot into pdf.

    save_path : str
        Path to save the pdf plot.

    """
    if isinstance(data, basestring):
        data = pd.read_csv(data)

    if isinstance(data, (list, tuple)):
        cols = len(data)

    if not isinstance(data, (list, tuple)):
        data = [data, ]

    sns.set_style(style)
    sns.set(font_scale=fontsize)

    with PdfPages(save_path) as pdf:
        fig, axes = plt.subplots(nrows=rows, ncols=cols, figsize=figsize,
                                 squeeze=True, sharey=True)
        axes = axes.reshape(-1)
        for ax, d in zip(axes, data):
            sns.stripplot(x=x, y=y, hue=hue, data=d, ax=ax, **kwargs)
        pdf.savefig(fig)
        plt.close()
Example #27
0
def plotrfACC():
    #data = json.loads(open('rf_accs.json').read())
    data = json.loads(open('rf_accs_top3.json').read())
    data = json.loads(open('rf_accs_nowindow.json').read())
    nLetter = 3 #14
    data["texts/ADHD_various_half/"] = [data["texts/ADHD_various_half/"][i] for i in [1,2,3]]

    sns.set_style("dark")

    #f, (ax1, ax2) = plt.subplots(1, 2)
    f, ax1 = plt.subplots()
    bar1 = ax1.bar(range(nLetter),data["texts/ADHD_various_half/"])
    ax1.set_title('RF accs for half SAX')
    plt.sca(ax1)
    plt.xticks(np.arange(nLetter) + .4, range(3,nLetter+3))
    plt.xlabel('# of bins (letters)/word')
    ax1.set_ylim([0.6,0.9])

    #bar2 = ax2.bar(range(nLetter),data["texts/ADHD_various_full/"])
    #ax2.set_title('RF accs for full SAX')
    #plt.sca(ax2)
    #plt.xticks(np.arange(nLetter) + .4, range(2,nLetter+2))
    #plt.xlabel('# of bins (letters)/word')
    #ax2.set_ylim([0.6,0.9])

    plt.show()
    def single_plot_multi_model_metrics(self):
        default_index = list(range(len(self.model_list)))
        bw = 0.15
        score_list = ['Accuracy', 'Precision', 'Recall', 'F1', 'ROCAUC']

        plt.figure(figsize=(18,5))

        for j,scoring in enumerate(score_list):
            ax = plt.subplot(111)
            ax.spines['right'].set_visible(False)
            ax.spines['top'].set_visible(False)
            sns.set_style("whitegrid")

            index = [i+bw*j for i in default_index]
            plt.bar(index, self.score_dict[scoring], bw,
                    align = 'center',
                    color = self.colors[(3+j)],
                    alpha = 0.6,
                    label = scoring)

            plt.title('Scores for Different Models', fontsize=15, fontweight='bold')
            tick_location = [i for i in default_index]
            plt.xticks(tick_location, self.index_func, rotation=60)
            plt.ylim(0.0, 1.1)
            if j == 0:
                plt.ylabel('Score',fontsize=20, fontweight='bold')
            if j == 4:
                plt.legend(loc='best')
            plt.grid(False)
def build_wordmap(w2v_mat):



    pca = PCA(n_components=2)
    pca.fit(w2v_mat.T)
    w2v_pca = pca.transform(w2v_mat.T)



    km = KMeans(n_clusters=6)
    labels = km.fit_predict(w2vt_mat.T)

    colors = 255 * ScalarMappable(cmap='Paired').to_rgba(np.unique(labels))[:, :3]
    hex_colors = ['#%02x%02x%02x' % (r, g, b) for r,g,b in colors]

    sns.set_style('dark')
    fig, ax = plt.subplots(1,1, figsize=(1.5,1.5))
    ax.axis('off')

    # ax = fig.add_subplot(111)

    for i in range(w2vt_pca.shape[0]):
        plt.text(w2vt_pca[i, 0], w2vt_pca[i, 1], str(vocab[i]),
                 fontdict={'color': hex_colors[labels[i]], 'size': 12})

    return ax
def draw_chart(chart_name,measure,axis,val_ordinate,train_ordinate,test_ordinate,dst_folder):
    plt.style.use('seaborn')
    sns.set(font_scale=1.2)
    sns.set_style({'font.family': 'serif'})
    fig, ax = plt.subplots(figsize=(8, 8))
    ttl = ax.title
    ttl.set_position([.5, 1.05])
    plt.tick_params(axis='both', which='major', labelsize=8)
    plt.tick_params(axis='both', which='minor', labelsize=8)

    ax.set_title(' '.join(chart_name.replace('_test_es_50_lr_1e-05_l2_0_0_mc_3_hsize_250','').split('_')[1:]))
    plt.yticks(np.arange(0, 1.1, 0.1))
    ax.set_ylim(0, 1)
    plt.xticks(np.arange(0, 11, 1))
    ax.set_xlim(0, 10)
    plt.grid(True)
    plt.xlabel('epochs')
    plt.ylabel(measure)
    if val_ordinate is not None:
        ax.plot(axis, val_ordinate, color=sns.xkcd_rgb["pale red"], marker='.', label='validation')  # plotting t, a separately
    if train_ordinate is not None:
        ax.plot(axis, train_ordinate,color=sns.xkcd_rgb["medium green"],  marker='.', label='train')  # plotting t, b separately
    if test_ordinate is not None:
        ax.plot(axis, test_ordinate, color=sns.xkcd_rgb["denim blue"], marker='.', label='test')  # plotting t, c separately
    ax.legend()
    plt.savefig(os.path.join(dst_folder,'{}_{}.pdf'.format('_'.join(chart_name.split('_')[1:]).replace('_test_es_50_lr_1e-05_l2_0_0_mc_3_hsize_250',''),measure)),dpi=300,bbox_inches='tight')
Example #31
0
File: figs.py Project: ninickl/bolt
def query_speed_fig(fake_data=False, fname='query_speed', with_matmuls=True,
                    camera_ready=False):
    # experiment params: fixed N = 100k, D = 256, Q = 1024;
    # layout: rows = 8B, 16B, 32B; bar graph in each row
    #   alternative: plot in each row vs batch size
    # algos: Bolt; PQ; OPQ; PairQ; Matmul, batch={1, 16, 64, 256}

    sb.set_context("talk")
    # if camera_ready:  # white style overwrites our fonts
    #     matplotlib.rcParams['font.family'] = CAMERA_READY_FONT
    set_palette(ncolors=8)
    # fig, axes = plt.subplots(3, 1, figsize=(6, 8))
    fig, axes = plt.subplots(3, 1, figsize=(6, 8), dpi=300)

    if fake_data:  # for debugging
        ALGOS = ['Bolt', 'PQ', 'OPQ', 'PairQ',
                 # 'Matmul Batch 1', 'Matmul Batch 16', 'Matmul Batch 64', 'Matmul Batch 256']
                 # 'Matmul Batch1', 'Matmul Batch16', 'Matmul Batch64', 'Matmul Batch256']
                 'Matmul 1', 'Matmul 16', 'Matmul 64', 'Matmul 256']
        algo2offset = {'Bolt': 100, 'PQ': 50, 'OPQ': 30, 'PairQ': 25,
                       # 'Matmul Batch 1': 1, 'Matmul Batch 16': 16,
                       # 'Matmul Batch 64': 64, 'Matmul Batch 256': 256}
                       # 'Matmul Batch1': 1, 'Matmul Batch16': 16,
                       # 'Matmul Batch64': 64, 'Matmul Batch256': 256}
                       'Matmul 1': 1, 'Matmul 16': 16, 'Matmul 64': 64,
                       'Matmul 256': 256}

        for i, nbytes in enumerate([8, 16, 32]):
            bytes_str = '{}B'.format(nbytes)
            dicts = []
            for algo in ALGOS:
                dps = np.random.randn(10) + 256 / nbytes
                dps += algo2offset[algo] / nbytes
                dicts += [{'algo': algo, 'nbytes': bytes_str, 'y': y} for y in dps]

            df = pd.DataFrame.from_records(dicts)
    else:
        # ALGOS = ['Bolt', 'PQ', 'OPQ', 'PairQ', 'Matmul 1', # 'Matmul 16',
        #          'Matmul 64', 'Matmul 256', 'Matmul 1024']

        if with_matmuls:
            ALGOS = ['Bolt', 'Binary Embedding', 'PQ', 'OPQ',
                    'Matmul 1', 'Matmul 256', 'Matmul 1024']
        else:
            ALGOS = ['Bolt', 'Binary Embedding', 'PQ', 'OPQ']
        df = results.query_speed_results()
        df['y'] = df['y'] / 1e9  # convert to billions

    print "df cols: ", df.columns
    df.rename(columns={'algo': ' '}, inplace=True)  # hide from legend

    # ax = sb.barplot(x='x', y='y', hue=' ', ci=95, data=df, ax=axes[i])
    for i, nbytes in enumerate([8, 16, 32]):
        bytes_str = '{}B'.format(nbytes)
        data = df[df['nbytes'] == nbytes]
        ax = sb.barplot(x='nbytes', y='y', hue=' ', hue_order=ALGOS, ci=95,
                        # data=data, ax=axes[i])
                        # data=data, ax=axes[i], errwidth=10)
                        data=data, ax=axes[i], capsize=.0004)
                        # data=data, ax=axes[i], capsize=.0004, errwidth=6)

    # ------------------------ clean up / format axes

    for ax in axes[:-1]:
        # remove x labels except for bottom axis
        plt.setp(ax.get_xticklabels(), visible=False)
        ax.get_xaxis().set_visible(False)

    end = .5 * (len(ALGOS) / float((len(ALGOS) + 2)))
    start = -end
    tick_positions = np.linspace(start + .02, end - .05, len(ALGOS))
    if camera_ready:
        tick_positions[0] += .02
        tick_positions[2] += .02
        tick_positions[3] += .01

    for ax in axes:
        ax.set_xlim([start - .02, end + .02])
        if camera_ready:
            # ax.set_ylabel('Billions of\nDistances/s', y=.4,
            # ax.set_ylabel('Billions of\nDistances/s', y=.5,
            ax.set_ylabel('Billion Distances/s', y=.49,  # .5 = centered ?
                          family=CAMERA_READY_FONT)
        else:
            ax.set_ylabel('Billions of Distances/s')
        ax.legend_.remove()
        if not fake_data:
            ax.set_ylim(0, 2.5)

    # add byte counts on the right
    fmt_str = "{}B Encodings"
    sb.set_style("white")  # adds border (spines) we have to remove
    for i, ax in enumerate(axes):
        ax2 = ax.twinx()
        sb.despine(ax=ax2, top=True, left=True, bottom=True, right=True)
        ax2.get_xaxis().set_visible(False)
        # ax2.get_yaxis().set_visible(False)  # nope, removes ylabel
        plt.setp(ax2.get_xticklabels(), visible=False)
        plt.setp(ax2.get_yticklabels(), visible=False)
        ax2.yaxis.set_label_position('right')
        if camera_ready:
            # ax2.set_ylabel(fmt_str.format((2 ** i) * 8), y=.39,
            ax2.set_ylabel(fmt_str.format((2 ** i) * 8),
                           labelpad=10, fontsize=14, family=CAMERA_READY_FONT)
        else:
            ax2.set_ylabel(fmt_str.format((2 ** i) * 8), labelpad=10, fontsize=15)

    # ------------------------ have bottom / top axes print title, x info

    if camera_ready:
        # axes[0].set_title('Distance Computations per Second', x=.39, y=1.02)
        # axes[0].set_title('Distance Computations per Second', x=.42, y=1.02,
        #                   family=CAMERA_READY_FONT)
        axes[0].set_title('Distance Computations per Second', y=1.02,
                          family=CAMERA_READY_FONT, fontsize=15)
    else:
        axes[0].set_title('Distance Computations per Second', y=1.02)

    # axes[-1].set_xticks(tick_positions)
    for ax in axes:
        axes[-1].set_xticks(tick_positions)
        ax.set_xlim(-.4, .4)  # no idea why this makes the bars fit right...
    xlabels = ["\n".join(name.split(' ')) for name in ALGOS]
    if not camera_ready:
        for i, lbl in enumerate(xlabels):
            if '\n' in lbl:
                # shift label up by adding another line
                xlabels[i] = xlabels[i] + '\n'
    # xlabels = ["\nBatch".join(name.split(' Batch')) for name in ALGOS]
    # xlabels = ALGOS
    axes[-1].set_xticklabels(xlabels, rotation=70)
    if camera_ready:
        # axes[-1].tick_params(axis='x', which='major', pad=15)
        # axes[-1].tick_params(axis='x', which='major', pad=13)
        axes[-1].tick_params(axis='x', which='major', pad=4)
        # axes[-1].set_xticklabels(xlabels, rotation=70, y=-.02)
    # else:
    # axes[-1].set_xticklabels(xlabels, rotation=70)
    # if camera_ready:
    #     axes[-1].set_xlabel("", labelpad=10)
    # else:
    axes[-1].set_xlabel("", labelpad=-20)
    # plt.setp(axes[-1].get_xlabel(), visible=False)  # doesn't work

    # ------------------------ show / save plot

    # plt.tight_layout()
    plt.tight_layout()
    if camera_ready:
        plt.subplots_adjust(hspace=.18)
    # save_fig(fname)
    # MPL conversion to pdf is selectively braindead for just this plot; it
    # lays things out horribly in a way that doesn't match the results
    # of show() at all. Just export as high-density png as a workaround
    # plt.savefig(os.path.join(SAVE_DIR, fname + '.png'),
    #             dpi=300, bbox_inches='tight')
    save_fig_png(fname)
Example #32
0
    date_min = series['Timestamp'][0] if len(series['Timestamp']) > 0 else 0
    date_max = series['Timestamp'][len(series)-1] if len(series['Timestamp']) > 0 else 0
    data = {'Timestamp': [], 'Transactions': [], 'USD': []}
    for d in pd.date_range(start=date_min, end=date_max):
        t = d.strftime('%Y-%m-%d')
        for i in range(len(series)):
            if t == series['Timestamp'][i]:
                data['Timestamp'].append(t)
                data['Transactions'].append(row)
                data['USD'].append(np.pi*8*magnitude(series['USD'][i]))
    data['Timestamp'] = [datetime.strptime(d, "%Y-%m-%d") for d in data['Timestamp']]
    return data

matplotlib.use('Agg')

seaborn.set_style("whitegrid", {'axes.grid': False})
flatui = ["#3498db"]
seaborn.set_palette(flatui)

series1 = load_data('reentrancy_timelime.csv', 4)
series2 = load_data('parity_wallet_hacks_timelime.csv', 3)
series3 = load_data('integer_overflow_timelime.csv', 1)
series4 = load_data('unhandled_exception_timelime.csv', 0)
series5 = load_data('short_address_timelime.csv', 2)

fig, ax = plt.subplots()

ax.scatter('Timestamp', 'Transactions', data=series1, marker='.', s=5, linewidths=1)
ax.scatter('Timestamp', 'Transactions', data=series1, linewidths=1, s=series1['USD'], alpha=0.3)

ax.scatter('Timestamp', 'Transactions', data=series2, marker='.', s=5, linewidths=1)
Example #33
0
def evolve_model(end_time, double_star, stars):
    time = 0 | units.yr
    dt = 0.05 * end_time / 1000.

    converter = nbody_system.nbody_to_si(double_star.mass,
                                         double_star.semimajor_axis)

    gravity = Hermite(converter)
    gravity.particles.add_particle(stars)
    to_stars = gravity.particles.new_channel_to(stars)
    from_stars = stars.new_channel_to(gravity.particles)

    period = get_period(double_star)
    print("Period =", period.as_string_in(units.yr))
    print("Mass loss timestep =", dt)
    print("Steps per period: = {:1.2f}".format(period / dt))

    a_an = [] | units.au
    e_an = []
    atemp = double_star.semimajor_axis
    etemp = double_star.eccentricity

    ###### COMMON ENVELOPE STUFF ###############
    final_a = 40 | units.RSun

    mu = double_star.mass * constants.G
    Eps0 = mu / (2 * double_star.semimajor_axis)
    Eps1 = mu / (2 * final_a)

    # Eps_ce should come from alpha lambda model, but we just fix the final semimajor axis here for simplicity
    Eps_ce = Eps1 - Eps0
    print("Eps_ce/Eps0", Eps_ce / Eps0)

    Tce = 1000 | units.yr
    Kce = K_from_eps(Eps0, Eps_ce, Tce, mu)
    print("Kce", Kce)
    Avisc = -Kce * Tce
    print("Avisc", Avisc.as_string_in(units.RSun**2))
    Rvisc = Avisc.sqrt() / (4 * constants.pi)
    print("Rvisc", Rvisc.as_string_in(units.RSun))

    vorb = (mu / double_star.semimajor_axis).sqrt()

    ###### END COMMON ENVELOPE STUFF ###############

    collision = False
    a = [] | units.au
    e = []
    m = [] | units.MSun
    t = [] | units.yr
    while time < end_time:
        time += dt
        if not collision:
            gravity.evolve_model(time)
            to_stars.copy()
            kick_stars_comenv2(stars, dt, Kce, Avisc)
            from_stars.copy()

            from_stars.copy()

            orbital_elements = orbital_elements_from_binary(stars,
                                                            G=constants.G)

            collision = check_collisions(stars)

        if atemp.number > 0:
            dadt = dadt_comenv_k0(atemp, etemp, Kce / Avisc)
            dedt = dedt_comenv_k0(atemp, etemp, Kce / Avisc)

            atemp = atemp + dadt * dt
            etemp = etemp + dedt * dt

        if collision and atemp.number < 0: break

        a_an.append(atemp)
        e_an.append(etemp)
        a.append(orbital_elements[2])
        e.append(orbital_elements[3])
        m.append(stars.mass.sum())
        t.append(time)
        print("time=",
              time.in_(units.yr),
              "a=",
              a[-1].in_(units.RSun),
              "e=",
              e[-1],
              "m=",
              stars.mass.in_(units.MSun),
              end="\r")

    gravity.stop()
    from matplotlib import pyplot
    import seaborn as sns
    sns.set(font_scale=1.33)
    sns.set_style("ticks")

    fig, axis = pyplot.subplots(nrows=2, sharex=True)
    axis[0].plot(t.value_in(units.yr),
                 a.value_in(units.RSun),
                 label="nbody k=0")
    axis[0].plot(t.value_in(units.yr),
                 a_an.value_in(units.RSun),
                 label="analytic")
    axis[0].set_ylabel("semimajor axis [$R_\odot$]")
    axis[0].legend()

    axis[1].plot(t.value_in(units.yr), e)
    axis[1].plot(t.value_in(units.yr), e_an)
    axis[1].set_ylabel("eccentricity")

    axis[1].set_xlabel("time [yr]")
    axis[0].set_xlabel("time [yr]")

    pyplot.tight_layout()
    pyplot.subplots_adjust(hspace=0.0)
    pyplot.savefig("comenv2.png")
    pyplot.show()
Example #34
0
File: figs.py Project: ninickl/bolt
def encoding_fig(fake_data=False, camera_ready=False):
    sb.set_style('darkgrid')
    # sb.set_context("talk", rc={"figure.figsize": (6, 6)})
    sb.set_context("talk", rc={"figure.figsize": (7, 7)})
    # sb.set_context("talk", rc={"figure.figsize": (8, 8)})
    # sb.set_context("talk", rc={"figure.figsize": (9, 9)})

    # fig, axes = plt.subplots(3, 1)
    fig, axes = plt.subplots(3, 2)

    # ALGOS = ['Bolt', 'PQ', 'OPQ', 'PairQ']
    ALGOS = ['Bolt', 'PQ', 'OPQ']
    algo2offset = {'Bolt': 100, 'PQ': 50, 'OPQ': 30, 'PairQ': 25}
    lengths = [64, 128, 256, 512, 1024]
    # results_for_algos_lengths =

    # sb.set_palette("Set1", n_colors=len(ALGOS))
    set_palette(ncolors=len(ALGOS))

    if fake_data:
        data = np.random.randn(1, len(lengths), len(algo2offset))
        for i, algo in enumerate(ALGOS):
            data[:, :, i] += algo2offset[algo]
        data /= np.arange(len(lengths)).reshape((1, -1, 1))

        # ------------------------ data encoding

        # 8B encodings
        ax = axes[0, 0]
        # sb.tsplot(data=data, condition=condition, time=lengths, ax=ax)
        sb.tsplot(data=data, condition=None, time=lengths, ax=ax)
        # ax.set_title(prefix + ' Encoding Speed, 8B codes')
        ax.set_title('Data Encoding Speed', y=1.02)

        # 16B encodings
        data /= 2
        ax = axes[1, 0]
        sb.tsplot(data=data, condition=None, time=lengths, ax=ax)

        # 32B encodings
        data /= 2
        ax = axes[2, 0]
        sb.tsplot(data=data, condition=None, time=lengths, ax=ax)

        # ------------------------ query encoding
        data *= 8
        data += np.random.randn(*data.shape) * 5

        # 8B encodings
        ax = axes[0, 1]
        sb.tsplot(data=data, condition=None, time=lengths, ax=ax)
        # ax.set_title(prefix + ' Encoding Speed')
        ax.set_title('Query Encoding Speed', y=1.03, fontsize=16)

        # 16B encodings
        data /= 2
        ax = axes[1, 1]
        sb.tsplot(data=data, condition=None, time=lengths, ax=ax)

        # 32B encodings
        data /= 2
        ax = axes[2, 1]
        sb.tsplot(data=data, condition=ALGOS, time=lengths, ax=ax)

    else:  # real data
        NBYTES_LIST = [8, 16, 32]

        df = results.encode_results()
        df_x = df[df['task'] == 'encode_x']
        df_q = df[df['task'] == 'encode_q']
        dfs = [df_x, df_q]

        # print df_x
        # return

        # dfs = [results.encode_data_results(), results.encode_lut_results()]
        ax_cols = [axes[:, 0], axes[:, 1]]

        for df, ax_col in zip(dfs, ax_cols):  # for each col in subplots
            for b, nbytes in enumerate(NBYTES_LIST):  # for each row in subplots
                ax = ax_col[b]
                plot_df = df.loc[df['nbytes'] == nbytes]
                plot_df = plot_df.loc[plot_df['algo'].isin(ALGOS)]
                sb.tsplot(value='y', condition='algo', unit='trial', time='D',
                          data=plot_df, ax=ax, ci=95, n_boot=500)
                    # data=plot_df, ax=ax, legend=False, ci=95, n_boot=500)

    # ------------------------ legend

    ax = axes.ravel()[-1]
    leg_lines, leg_labels = ax.get_legend_handles_labels()
    # ax.legend_.remove()
    # leg_lines, leg_labels = leg_lines[:len(ALGOS)], leg_labels[:len(ALGOS)]

    plt.figlegend(leg_lines, leg_labels, loc='lower center',
                  ncol=len(ALGOS), labelspacing=0)

    # ------------------------ postproc + save plot

    for ax in axes.ravel():
        ax.set_yscale("log")
        ax.legend_.remove()
        ax.set_ylim(5e3, 2e7)

    if camera_ready:
        # axes[0, 0].set_title('Data Encoding Speed', x=.45, y=1.03, fontsize=16)
        # axes[0, 1].set_title('Query Encoding Speed', x=.45, y=1.03, fontsize=16)
        axes[0, 0].set_title('Data Encoding Speed', x=.49, y=1.03, fontsize=18)
        axes[0, 1].set_title('Query Encoding Speed', x=.5, y=1.03, fontsize=18)
    else:
        axes[0, 0].set_title('Data Encoding Speed', y=1.03, fontsize=16)
        axes[0, 1].set_title('Query Encoding Speed', y=1.03, fontsize=16)
    # for ax in axes[0, :].ravel():
        # ax.set_title('Vector Length')

    for ax in axes[:-1, :].ravel():
        # ax.xaxis.set_visible(False)
        plt.setp(ax.get_xticklabels(), visible=False)
        ax.set_xlabel('', labelpad=-10)
    for ax in axes[-1, :].ravel():
        # ax.set_xlabel('Vector Length')
        ax.set_xlabel('Vector Length', labelpad=7)
    for ax in axes[:, 0]:
        if camera_ready:
            # ax.set_ylabel('Vectors Encoded / s   ', fontsize=12)
            ax.set_ylabel('Vectors Encoded / s', fontsize=13)
        else:
            ax.set_ylabel('Vectors Encoded / s')

    # only bottom row gets xlabels
    for ax in axes[:-1, :].ravel():
        # plt.setp(ax.get_xticklabels(), visible=False)
        ax.set_xlabel('', labelpad=-10)

    # show byte counts on the right
    fmt_str = "{}B Encodings"
    # if camera_ready:
    #     fmt_str += '  '
    for i, ax in enumerate(axes[:, 1].ravel()):
        ax.yaxis.set_label_position('right')
        ax.set_ylabel(fmt_str.format((2 ** i) * 8), labelpad=10, fontsize=15)

    plt.tight_layout()
    plt.subplots_adjust(bottom=.15)
    if camera_ready:
        save_fig_png('encoding_speed')  # bypass mpl truetype pdf ineptitude
    else:
        save_fig('encoding_speed')
Example #35
0
File: figs.py Project: ninickl/bolt
def distortion_fig(fake_data=False, l2=True, suptitle=None,
                   fname='l2_distortion', camera_ready=False):
    # experiment params:
    #   datasets = Sift1M, Convnet1M, LabelMe22k, MNIST
    #   bytes = [8, 16, 32]
    # layout: [ndatasets x nums_bytes] (ie, [4x3])
    #   each subplot a barplot showing corr with err bars

    DATASETS = ['Sift1M', 'Convnet1M', 'LabelMe', 'MNIST']
    ALGOS = ['Bolt', 'PQ', 'OPQ', 'PairQ']
    NBYTES_LIST = [8, 16, 32]

    figsize = (6, 8)
    sb.set_style('darkgrid')
    sb.set_context("talk", rc={'xtick.major.pad': 3})
    set_palette(ncolors=len(ALGOS))
    # fig, axes = plt.subplots(4, 3)
    # fig, axes = plt.subplots(4, 1, figsize=figsize)
    fig, axes = plt.subplots(4, 1, figsize=figsize, dpi=300)
    axes = axes.reshape((4, 1))

    if suptitle is None:
        suptitle = 'Quality of Approximate Distances'

    # fake_data = data is None
    if fake_data:
        algo2offset = {'Bolt': .4, 'PQ': .3, 'OPQ': .45, 'PairQ': .5}
        nfake_corrs = 10

        dicts = []
        for dataset in DATASETS:
            for nbytes in NBYTES_LIST:
                for algo in ALGOS:
                    if fake_data:
                        corrs = np.random.rand(nfake_corrs) / 2.
                        corrs += algo2offset[algo]
                        corrs *= .9 + .1 * nbytes / 32.
                    params = {'algo': algo, 'dataset': dataset,
                              'nbytes': '{}B'.format(nbytes)}
                    dicts += [dict(params, **{'corr': c}) for c in corrs]

        # data = pd.DataFrame.from_records(dicts, index=[0])
        data = pd.DataFrame.from_records(dicts)
        # print data
        # return

        # ------------------------ plot the data

        for d, dataset in enumerate(DATASETS):
            # df_dataset = data.loc[data['dataset'] == dataset]
            df = data.loc[data['dataset'] == dataset]
            df.rename(columns={'algo': ' '}, inplace=True)  # hide from legend

            ax = axes.ravel()[d]
            sb.barplot(x='nbytes', y='corr', hue=' ', data=df, ax=ax)

    else:
        DATASETS = ['Sift1M', 'Convnet1M', 'LabelMe', 'MNIST']
        # ALGOS = ['Bolt', 'PQ', 'OPQ', 'PairQ']
        # DATASETS = ['Convnet1M', 'MNIST']
        # ALGOS = ['PQ', 'OPQ']
        # ALGOS = ['PQ4', 'PQ', 'OPQ']
        ALGOS = ['Bolt No Quantize', 'PQ', 'OPQ']
        for d, dset in enumerate(DATASETS):
            if l2:
                path = os.path.join('../results/correlation_l2/', dset, 'all_results.csv')
            else:
                path = os.path.join('../results/correlation_dotprods/', dset, 'all_results.csv')

            df = pd.read_csv(path)

            print "path: ", path

            pq4 = (df['_algo'] == 'PQ') & (df['_code_bits'] == 4)
            df.loc[pq4, '_algo'] = 'Bolt No Quantize'
            bolt_rot = (df['_algo'] == 'Bolt') & (df['opq_iters'] > 0)
            df = df.loc[~bolt_rot]

            # print df.loc[df['_algo'] == 'PQ4']
            # print df.loc[df['_algo'] == 'PQ4']
            # return

            df.rename(columns={'_algo': ' '}, inplace=True)

            # df['nbytes'] = df['_code_bits'] * df['_ncodebooks'] / 8
            all_nbytes = (df['_code_bits'] * df['_ncodebooks'] / 8).values
            df['nbytes'] = ["{}B".format(b) for b in all_nbytes.astype(np.int)]

            ax = axes.ravel()[d]
            # sb.barplot(x='nbytes', y='corr', hue=' ', data=df, ax=ax)
            sb.barplot(x='nbytes', y='corr', hue=' ', data=df, ax=ax, capsize=.0025)

            ax.set_title(dset)

    # ------------------------ legend

    ax = axes.ravel()[-1]
    leg_lines, leg_labels = ax.get_legend_handles_labels()
    plt.figlegend(leg_lines, leg_labels, loc='lower center',
                  ncol=2, labelspacing=0)

    # ------------------------ axis cleanup / formatting

    # configure all axes
    for i, ax in enumerate(axes.ravel()):
        # title = "{}".format(DATASETS[i]) # TODO uncomment
        # ax.set_title(title, y=1.01) # TODO uncomment
        # ax.set_ylim([0, 1])
        ax.set_ylim([.5, 1])
        # ax.set_ylim([.75, 1])
        ax.set_xlabel('', labelpad=-10)
        if l2:
            ax.set_ylabel('Correlation With\nTrue Distance')
        else:
            if camera_ready:
                # ax.set_ylabel('Correlation With\nTrue Dot Product', y=.46, fontsize=13)
                ax.set_ylabel('Correlation With\nTrue Dot Product', fontsize=13)
            else:
                ax.set_ylabel('Correlation With\nTrue Dot Product')
        if ax.legend_:
            ax.legend_.remove()

    # ------------------------ show / save plot

    # plt.tight_layout()  # for fig size 6x9
    plt.tight_layout(h_pad=.8)
    # if camera_ready:
    #     plt.suptitle(suptitle, fontsize=17)
    # else:
    #     plt.suptitle(suptitle, fontsize=16)
    plt.suptitle(suptitle, fontsize=16)
    # plt.subplots_adjust(top=.92, bottom=.08)  # for fig size 6x9
    # plt.subplots_adjust(top=.90, bottom=.08)
    plt.subplots_adjust(top=.90, bottom=.1)
    if camera_ready:
        save_fig_png(fname)  # bypass mpl truetype pdf ineptitude
    else:
        save_fig(fname)
Example #36
0
File: figs.py Project: ninickl/bolt
def recall_r_fig(fake_data=False, suptitle=None, l2=True, fname='l2_recall',
                 camera_ready=False):
    # experiment params:
    #   datasets = Sift1M, Convnet1M, LabelMe22k, MNIST
    #   bytes = [8, 16, 32]
    #   R = 1, 10, 100, 1000

    DATASETS = ['Sift1M', 'Convnet', 'LabelMe', 'MNIST']
    ALGOS = ['Bolt', 'PQ', 'OPQ', 'PairQ']
    NBYTES_LIST = [8, 16, 32]
    # Rs = [1, 10, 100, 1000]
    Rs = [1, 5, 10, 50, 100, 500, 1000]

    sb.set_style('darkgrid')
    sb.set_context("talk")
    set_palette(ncolors=len(ALGOS))
    fig, axes = plt.subplots(4, 3, figsize=(6, 9))

    if suptitle is None:
        suptitle = 'Nearest Neighbor Recall'

    if fake_data:
        algo2offset = {'Bolt': -.1, 'PQ': -.2, 'OPQ': 0, 'PairQ': .1}
        data = np.random.rand(1, len(Rs), len(algo2offset))
        data = np.sort(data, axis=1)  # ensure fake recalls are monotonic
        for i, algo in enumerate(ALGOS):
            recall = data[:, :, i] + algo2offset[algo]
            data[:, :, i] = np.clip(recall, 0., 1.)

        line_styles_for_nbytes = {8: '-', 16: '-', 32: '-'}

        # plot the data
        for d, dataset in enumerate(DATASETS):
            axes_row = axes[d]
            for b, nbytes in enumerate(NBYTES_LIST):
                ax = axes_row[b]
                if fake_data:  # TODO handle real data
                    data_tmp = data * (.5 + nbytes / 64.)  # slightly less
                assert np.max(data_tmp) <= 1.
                for algo in ALGOS:
                    x = Rs
                    sb.tsplot(data=data_tmp, condition=ALGOS, time=x, ax=ax, n_boot=100,
                              ls=line_styles_for_nbytes[nbytes])

    else:  # real data
        DATASETS = ['Sift1M', 'Convnet1M', 'LabelMe', 'MNIST']
        # ALGOS = ['PQ', 'OPQ']
        ALGOS = ['Bolt', 'Bolt No Quantize', 'PQ', 'OPQ']
        for d, dset in enumerate(DATASETS):
            if l2:
                path = os.path.join('../results/recall_at_r/', dset, 'summary.csv')
            else:
                path = os.path.join('../results/recall_at_r_mips/', dset, 'summary.csv')

            df = pd.read_csv(path)
            pq4 = (df['_algo'] == 'PQ') & (df['_code_bits'] == 4)
            df.loc[pq4, '_algo'] = 'Bolt No Quantize'

            # rm results with bolt rotations
            bolt_rot = (df['_algo'] == 'Bolt') & (df['opq_iters'] > 0)
            df = df.loc[~bolt_rot]

            df.rename(columns={'_algo': 'algo'}, inplace=True)
            all_nbytes = (df['_code_bits'] * df['_ncodebooks'] / 8).values
            df['nbytes'] = all_nbytes.astype(np.int)

            for b, nbytes in enumerate(NBYTES_LIST):
                ax = axes[d, b]
                data = df.loc[df['nbytes'] == nbytes]
                for algo in ALGOS:
                    df_row = data.loc[data['algo'] == algo]  # should be 1 row
                    if len(df_row) != 1:
                        print df_row
                        print "dset = ", dset
                        print "algo = ", algo
                        assert len(df_row) == 1
                    assert len(df_row) == 1

                    x = np.array(Rs)
                    y = [df_row['recall@{}'.format(r)].values[0] for r in x]
                    if camera_ready:
                        x = np.log10(x)
                    # print "recall plot: using X values: ", x  # TODO rm
                    ax.plot(x, y, label=algo)
                    ax.legend()

    # ------------------------ legend

    ax = axes.ravel()[-1]
    leg_lines, leg_labels = ax.get_legend_handles_labels()
    # for some reason, each algo appears 3x, so just take first
    leg_lines, leg_labels = leg_lines[:len(ALGOS)], leg_labels[:len(ALGOS)]

    plt.figlegend(leg_lines, leg_labels, loc='lower center',
                  ncol=len(ALGOS), labelspacing=0)

    # ------------------------ axis cleanup / formatting

    # configure all axes
    for i, ax_row in enumerate(axes):
        for j, ax in enumerate(ax_row):
            title = "{}, {}B".format(DATASETS[i], NBYTES_LIST[j])
            if camera_ready:
                # x_pos = .44 if j == 0 else .45
                # ax.set_title(title, x=x_pos, y=1.01, fontsize=15)
                # ax.set_title(title, x=.45, y=1.01, fontsize=15)
                # x_pos = .49 if j == 0 else .48
                # ax.set_title(title, x=.49, y=1.01, fontsize=15)
                ax.set_title(title, y=1.01, fontsize=15)
            else:
                ax.set_title(title, y=1.01)
            ax.set_ylim([0, 1])
            if not camera_ready:
                ax.set_xscale("log")

            # remove all legends except the very last one
            if (i != len(axes) or j != len(ax_row)) and ax.legend_:
                ax.legend_.remove()

    # remove x labels except for bottom axis
    for ax in axes[:-1, :].ravel():
        plt.setp(ax.get_xticklabels(), visible=False)
        # ax.get_xaxis().set_visible(False)

    if axes.shape[1] > 1:
        # hide y axis for axes not in left col
        for i, ax in enumerate(axes[:, 1:].ravel()):
            # pass
            # ax.get_yaxis().set_visible(False)
            ax.get_yaxis().set_ticklabels([], labelpad=-10, fontsize=1)

        # ylabel left col
        for i, ax in enumerate(axes[:, 0].ravel()):
            ax.set_ylabel("Recall@R")

        # xlabel bottom rows
        if camera_ready:
            for i, ax in enumerate(axes.ravel()):
                ax.set_xticks([0, 1, 2, 3])
            for i, ax in enumerate(axes[-1, :].ravel()):
                ax.set_xticklabels(['0', '1', '2', '3'])
        else:
            for i, ax in enumerate(axes[-1, :].ravel()):
                # no idea why we need the dummy tick at the beginning
                ax.set_xticklabels(['', '0', '1', '2', ''])
            axes[-1, -1].set_xticklabels(['', '0', '1', '2', '3'])

        axes[-1, 1].set_xlabel("Log10(R)")

    # ------------------------ show / save plot

    # plt.tight_layout(h_pad=.02, w_pad=.02)
    plt.tight_layout(w_pad=.02)
    # plt.subplots_adjust(top=.88, bottom=.21, hspace=.4)
    # if camera_ready:
    #     plt.suptitle(suptitle, fontsize=18)
    # else:
    #     plt.suptitle(suptitle, fontsize=16)
    plt.suptitle(suptitle, fontsize=16)
    plt.subplots_adjust(top=.91, bottom=.11)
    if camera_ready:
        save_fig_png(fname)  # mpl saving as pdf stupid; just bypass it
    else:
        save_fig(fname)
Example #37
0
File: figs.py Project: ninickl/bolt
def matmul_fig(fake_data=False, fname='matmul', camera_ready=False):
    # two line graphs
    # lines in both top and bottom = bolt {8,16,32}B, matmul
    # just use square mats of power-of-two lengths cuz best case for matmuls
    # in top one, one mat already encoded and Bolt just has to do queries
    # in bottom one, Bolt has encode one of the mats as data before queries

    sb.set_style('darkgrid')
    sb.set_context("talk")
    # sb.set_palette("Set1", n_colors=len(ALGOS))
    pal = set_palette(ncolors=8)
    fig, axes = plt.subplots(2, 1, figsize=(6, 6))
    # axes = axes.reshape((2, 1))

    if fake_data:  # for debuging / prototyping fig
        SIZES = np.array([64, 128, 256, 512, 1024, 2048, 4096], dtype=np.float32)
        matmul_times = (SIZES ** 2.5).reshape((-1, 1))  # strassen-ish scaling
        bolt_times = ((SIZES ** 3) / 100 + 400).reshape((-1, 1))

        # pretend we had 5 trials; each trial gets a col, so rows are lengths
        matmul_times = np.tile(matmul_times, (1, 5))
        bolt_times = np.tile(bolt_times, (1, 5))
        matmul_times += np.random.randn(*matmul_times.shape) * SIZES.T.reshape((-1, 1)) / 10.
        bolt_times += np.random.randn(*bolt_times.shape) * SIZES.T.reshape((-1, 1)) / 10.

        matmul_times /= 1e9
        bolt8_times = bolt_times / 2e9
        bolt16_times = bolt_times / 1e9
        bolt32_times = bolt_times / .5e9

        dicts = []
        ALGOS = ['Bolt 8B', 'Bolt 16B', 'Bolt 32B', 'Floats (BLAS)']
        algo_times = [bolt8_times, bolt16_times, bolt32_times, matmul_times]
        for all_times, algo in zip(algo_times, ALGOS):
            for sz, times_for_sz in zip(SIZES, all_times):
                dicts += [{'algo': algo, 'trial': i, 'size': sz, 'y': t}
                          for i, t in enumerate(times_for_sz)]

        df = pd.DataFrame.from_records(dicts)
        df_enc = df
        df_no_enc = df

        sb.tsplot(time='size', value='y', condition='algo', unit='trial',
                  data=df_no_enc, ax=axes[0], n_boot=100)
        sb.tsplot(time='size', value='y', condition='algo', unit='trial',
                  data=df_enc, ax=axes[1], n_boot=100)

    else:
        # ALGOS = ['Bolt 8B', 'Bolt 16B', 'Bolt 32B', 'Floats']
        # ALGOS = ['Bolt 32B', 'Bolt 32B + Encode', 'Floats']
        # ALGOS = ['Bolt 8B', 'Bolt 32B', 'Bolt 32B + Encode', 'Floats']
        ALGOS = ['Bolt 8B', 'Bolt 8B + Encode', 'Bolt 32B', 'Bolt 32B + Encode', 'Floats']
        # df = results.matmul_results_square()

        def clean_df(df):
            df = df.loc[df['algo'].isin(ALGOS)]
            non_encode_algos = ['Bolt 8B', 'Bolt 16B', 'Bolt 32B']
            # rm_idxs = (df['algo'] == 'Bolt 32B') * (df['enc'] == 1)
            rm_idxs = (df['algo'].isin(non_encode_algos)) * (df['enc'] == 1)
            df = df.loc[~rm_idxs]

            df['algo'].loc[df['algo'] == 'Floats'] = 'Floats (BLAS)'
            return df

        df = results.matmul_results(which='square')
        df = clean_df(df)

        colors = {
            'Bolt 8B': pal[0], 'Bolt 8B + Encode': pal[0],
            # 'Bolt 16B': pal[2], 'Bolt 16B + Encode': pal[2],
            'Bolt 32B': pal[1], 'Bolt 32B + Encode': pal[1],
            'Floats (BLAS)': 'k'
        }
        df_no_enc = df.loc[df['enc'] != 1]
        sb.tsplot(time='size', value='y', condition='algo', unit='trial',
                  data=df_no_enc, ax=axes[0], n_boot=100, color=colors, linestyle='solid')
        df_enc = df.loc[df['enc'] == 1]
        sb.tsplot(time='size', value='y', condition='algo', unit='trial',
                  data=df_enc, ax=axes[0], n_boot=100, color=colors, linestyle='dotted', lw=4)

        df = results.matmul_results(which='tall')
        df = clean_df(df)

        # print df
        # return

        # sb.tsplot(time='size', value='y', condition='algo', unit='trial',
        #           data=df, ax=axes[1], n_boot=100, color=colors)
        df_no_enc = df.loc[df['enc'] != 1]
        sb.tsplot(time='size', value='y', condition='algo', unit='trial',
                  data=df_no_enc, ax=axes[1], n_boot=100, color=colors, linestyle='solid')
        df_enc = df.loc[df['enc'] == 1]
        sb.tsplot(time='size', value='y', condition='algo', unit='trial',
                  data=df_enc, ax=axes[1], n_boot=100, color=colors, linestyle='dotted', lw=4)


        # axes[1].set_ylim(1, 1e3)

    # without encoding at the top; with encoding on the bottom
    # sb.tsplot(time='size', value='y', condition='algo', unit='trial',
    # sb.tsplot(time='size', value='y', condition='algo', unit='trial',
    #     data=df_no_enc, ax=axes[0], n_boot=100)
    # sb.tsplot(time='size', value='y', condition='algo', unit='trial',
    #     data=df_enc, ax=axes[1], n_boot=100)

    # ------------------------ legend

    ax = axes.ravel()[-1]
    leg_lines, leg_labels = ax.get_legend_handles_labels()
    # for some reason, each algo appears 3x, so just take first
    leg_lines, leg_labels = leg_lines[:len(ALGOS)], leg_labels[:len(ALGOS)]

    plt.figlegend(leg_lines, leg_labels, loc='lower center',
                  ncol=len(ALGOS)/2, labelspacing=0)

    # ------------------------ axis cleanup / formatting

    # axes[0].set_title('Matrix Multiply Time, One Matrix Encoded', y=1.03, fontsize=16)
    # axes[1].set_title('Matrix Multiply Time, Neither Matrix Encoded', y=1.03, fontsize=16)
    axes[0].set_title('Square Matrix Multiply Time', y=1.03, fontsize=16)
    axes[1].set_title('Tall Matrix Multiply Time', y=1.03, fontsize=16)

    for ax in axes.ravel():
        ax.legend_.remove()
        ax.set_xscale('log', basex=2)
        ax.set_yscale('log', basey=10)
        if not camera_ready:
            ax.set_ylabel('Wall Time (s)')
    if camera_ready:
        axes[0].set_ylabel('Wall Time (s)')
        axes[1].set_ylabel('Wall Time (s)', labelpad=10)
    # for ax in axes[:-1].ravel():
    # #     plt.setp(ax.get_xticklabels(), visible=False)
    #     ax.set_xlabel('', labelpad=-10)

    # axes[0].set_xlabel('Matrix Side Length, L', labelpad=-1)
    axes[0].set_xlabel('Matrix Side Length')
    axes[1].set_xlabel('Matrix Side Length')

    # ------------------------ show / save plot

    # plt.tight_layout(h_pad=1.4)
    plt.tight_layout(h_pad=1.2)
    plt.subplots_adjust(bottom=.23)
    if camera_ready:
        save_fig_png('matmul_speed')  # bypass mpl truetype pdf ineptitude
    else:
        save_fig('matmul_speed')
import numpy as np
from scipy.io import mmread
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import glob
import re
import os
import sys
from sklearn.metrics.pairwise import cosine_similarity
sns.set(style='darkgrid')
sns.set_style(style='whitegrid')

# 一つ上の階層のmoduleをインポートできるようにする
current_dir = os.path.dirname(os.path.abspath("__file__"))
sys.path.append( str(current_dir) + '/../' )
from setting_param import ratio_test
from setting_param import ratio_valid
from setting_param import all_node_num
from setting_param import MakeSample_repeat3_attribute_prediction_new_InputDir

EXIST_TABLE = np.load(MakeSample_repeat3_attribute_prediction_new_InputDir + '/exist_table.npy')

from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_Baseline_InputDir as Baseline_InputDir
from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_LSTM_InputDir as LSTM_InputDir
from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_STGGNN_InputDir as STGGNN_InputDir
from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_EGCNh_InputDir as EGCNh_InputDir
from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_STGCN_InputDir as STGCN_InputDir
from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_EGCNo_InputDir as EGCNo_InputDir
from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_GCN_InputDir as GCN_InputDir
from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_DynGEM_InputDir as DynGEM_InputDir
Example #39
0
"""
Code for generating plots.
"""
import pandas
import seaborn as sns
import matplotlib
import msprime
import os
import matplotlib.patches as mpatches
from matplotlib import pyplot as plt
import numpy as np
# Force matplotlib to not use any Xwindows backend.
matplotlib.use('Agg')

sns.set_style("darkgrid")


def plot_stairway_Ne_estimate(infile, outfile):
    """
    figure of N(t) for single run of stairwayplot
    """
    nt = pandas.read_csv(infile, sep="\t", skiprows=5)
    nt = nt[nt['year'] > 10]
    f, ax = plt.subplots(figsize=(7, 7))
    ax.set(xscale="log", yscale="log")
    ax.plot(nt['year'], nt['Ne_median'], c="red")
    ax.plot(nt['year'], nt['Ne_2.5%'], c='grey')
    ax.plot(nt['year'], nt['Ne_97.5%'], c='grey')
    f.savefig(outfile, bbox_inches='tight')

Example #40
0
import multiprocessing
from gensim.models import Word2Vec
from time import time

import logging  # Setting upp the loggings to monitor gensim
logging.basicConfig(filename="Logger(1).log",
                    format="%(levelname)s - %(asctime)s: %(message)s",
                    datefmt='%H:%M:%S',
                    level=logging.INFO)

import numpy as np
import matplotlib.pyplot as plt

import seaborn as sns
sns.set_style('darkgrid')

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import pandas as pd

# Read sentences from file
logging.info("Start load sentences from file")
fname = "simpsons_trainingset.txt"
sentences = []
with open(fname, 'r', encoding='utf-8') as f:
    while True:
        sent = f.readline()
        if len(sent) == 0:
            break
        sent = sent[:-1].split()
        sentences.append(sent)
Example #41
0
def process_data(file: str='./params.json') -> Tuple[pd.DataFrame, List[Any], Dict[str, pd.DataFrame]]:
    """Runs imaging analysis based on inputs from a parameter file"""
    sns.set_style('darkgrid')
    ############## PART 1 Preprocess data ##########################
    ################ Loading params ####################
    print(f"LOADING PARAMETERS FROM {file}")
    params = json.load(open(file, 'r'))
    mode = params.get("mode")
    dpaths = params.get("dpaths")
    offset_events = params.get("offset_events")
    signal_channel = params.get("signal_channel")
    reference_channel = params.get("reference_channel")
    deltaf_options = params.get("deltaf_options")
    z_score_before_alignment = params.get("z_score_before_alignment")
    analysis_blocks = params.get("analysis_blocks")
    path_to_ttl_event_params = params.get("path_to_ttl_event_params")
    path_to_social_excel = params.get("path_to_social_excel")
    trunc_start = params.get("trunc_start", 0)
    trunc_end = params.get("trunc_end", 10)
    ####################### PREPROCESSING DATA ###############################
    print(f'\n\n\n\nRUNNING IN MODE: {mode} \n\n\n')
    for dpath_ind, dpath in enumerate(dpaths):
        # Reads data from Tdt folder
        PrintNoNewLine('\nCannot find processed pkl object, reading TDT folder instead...')
        block = ReadNeoTdt(path=dpath, return_block=True)
        seglist = block.segments
        print('Done!')

        # Trunactes first/last seconds of recording
        PrintNoNewLine('Truncating signals and events...')
        seglist = TruncateSegments(seglist, start=trunc_start, end=trunc_end, clip_same=True)
        print('Done!')


        # Iterates through each segment in seglist. Right now, there is only one segment
        for segment in seglist:
            segment_name = segment.name
            # Extracts the sampling rate from the signal channel
            try:
                sampling_rate = [x for x in segment.analogsignals if x.name == signal_channel][0].sampling_rate
            except IndexError:
                raise ValueError('Could not find your channels. Make sure you have the right names!')
            # Appends an analog signal object that is delta F/F. The name of the channel is
            # specified by deltaf_ch_name above. It is calculated using the function
            # NormalizeSignal in signal_processing.py. As of right now it:
            # 1) Lowpass filters signal and reference (default cutoff = 40 Hz, order = 5)
            # 2) Calculates deltaf/f for signal and reference (default is f - median(f) / median(f))
            # 3) Detrends deltaf/f using a savgol filter (default window_lenght = 3001, poly order = 1)
            # 4) Subtracts reference from signal
            # NormalizeSignal has a ton of options, you can pass in paramters using
            # the deltaf_options dictionary above. For example, if you want it to be mean centered
            # and not run the savgol_filter, set deltaf_options = {'mode': 'mean', 'detrend': False}
            PrintNoNewLine('\nCalculating delta_f/f...')
            all_signals = ProcessSignalData(seg=segment, sig_ch=signal_channel, ref_ch=reference_channel,
                                name='DeltaF_F', fs=sampling_rate, highcut=40.0, **deltaf_options)
            # Appends an Event object that has all event timestamps and the proper label
            # (determined by the evtframe loaded earlier). Uses a tolerance (in seconds)
            # to determine if events co-occur. For example, if tolerance is 1 second
            # and ch1 fires an event, ch2 fires an event 0.5 seconds later, and ch3 fires
            # an event 3 seconds later, the output array will be [1, 1, 0] and will
            # match the label in evtframe (e.g. 'omission')
            print('Done!')
            if mode == 'TTL':
                # Loading event labeling/combo parameters
                path_to_event_params = path_to_ttl_event_params[dpath_ind]
            elif mode == 'manual':
                # Generates a json for reading excel file events
                path_to_event_params = 'imaging_analysis/manual_event_params.json'
                GenerateManualEventParamsJson(path_to_social_excel[dpath_ind], event_col='Bout type', 
                    name=path_to_event_params)
            # This loads our event params json
            start, end, epochs, evtframe, typeframe = LoadEventParams(dpath=path_to_event_params, 
                mode=mode)
            # Appends processed event_param.json info to segment object
            AppendDataframesToSegment(segment, [evtframe, typeframe], 
                ['eventframe', 'resultsframe'])
            # Processing events
            PrintNoNewLine('\nProcessing event times and labels...')
            if mode == 'manual':
                manualframe = path_to_social_excel[dpath_ind]
            else:
                manualframe = None
            ProcessEvents(seg=segment, tolerance=.1, evtframe=evtframe, 
                name='Events', mode=mode, manualframe=manualframe, 
                event_col='Bout type', start_col='Bout start', end_col='Bout end', offset_events=offset_events[dpath_ind])
            print('Done!')
            # Takes processed events and segments them by trial number. Trial start
            # is determined by events in the list 'start' from LoadEventParams. This
            # can be set in the event_params.json. Additionally, the result of the 
            # trial is set by matching the epoch type to the typeframe dataframe 
            # (also from LoadEventParams). Example of epochs are 'correct', 'omission',
            # etc. 
            # The result of this process is a dataframe with each event and their
            # timestamp in chronological order, with the trial number and trial outcome
            # appended to each event/timestamp.
            PrintNoNewLine('\nProcessing trials...')
            trials = ProcessTrials(seg=segment, name='Events', 
                startoftrial=start, epochs=epochs, typedf=typeframe, 
                appendmultiple=False)
            print('Done!')
            # With processed trials, we comb through each epoch ('correct', 'omission'
            # etc.) and find start/end times for each trial. Start time is determined
            # by the earliest 'start' event in a trial. Stop time is determined by
            # 1) the earliest 'end' event in a trial, 2) or the 'last' event in a trial
            # or the 3) 'next' event in the following trial.
            PrintNoNewLine('\nCalculating epoch times and durations...')
            GroupTrialsByEpoch(seg=segment, startoftrial=start, endoftrial=end, 
                endeventmissing='last')
            print('Done!')
            segment.processed = True


        ################### ALIGN DATA ##########################################
        # for segment in seglist:
            for block in analysis_blocks:
                # Extract analysis block params
                epoch_name = block['epoch_name']
                event = block['event']
                prewindow = block['prewindow']
                postwindow = block['postwindow']
                downsample = block['downsample']
                z_score_window = block['z_score_window']
                quantification = block['quantification']
                baseline_window = block['baseline_window']
                response_window = block['response_window']
                save_file_as = block['save_file_as']
                heatmap_range = block['plot_paramaters']['heatmap_range']
                smoothing_window = block['plot_paramaters']['smoothing_window']

                lookup = {}
                for channel in ['Filtered_signal', 'Filtered_reference', 'Detrended', 'Detrended_reference', 'DeltaF_F_or_Z_score']:
                    print(('\nAnalyzing "{}" trials centered around "{}". Channel: "{}" \n'.format(epoch_name, event, channel)))

                    dict_name = "{}_{}".format(epoch_name, channel)
                    lookup[channel] = dict_name 
                    PrintNoNewLine('Centering trials and analyzing...')
                    AlignEventsAndSignals(seg=segment, epoch_name=epoch_name, analog_ch_name=channel, 
                        event_ch_name='Events', event=event, event_type='label', 
                        prewindow=prewindow, postwindow=postwindow, window_type='event', 
                        clip=False, name=dict_name, to_csv=False, dpath=dpath)
                    print('Done!')



        ######################## PROCESS SIGNALS (IF NECESSARY); PLOT; STATS ######
                # Load data

                signal = segment.analyzed[lookup['Filtered_signal']]['all_traces']
                reference = segment.analyzed[lookup['Filtered_reference']]['all_traces']

                # Down sample data
                if downsample > 0:
                    signal = Downsample(signal, downsample, index_col='index')
                    reference = Downsample(reference, downsample, index_col='index')

                # # Scale signal if it is too weak (want std to be at least 1)
                # if (np.abs(signal.mean().std()) < 1.) or (np.abs(reference.mean().std()) < 1.):
                #     scale_factor = 10**(np.ceil(np.log10(1/(signal.mean().std()))))

                #     signal = signal * scale_factor
                #     reference = reference * scale_factor

                # Get plotting read
                figure = plt.figure(figsize=(12, 12))
                figure.subplots_adjust(hspace=1.3)
                ax1 = plt.subplot2grid((6, 2), (0, 0), rowspan=2)
                ax2 = plt.subplot2grid((6, 2), (2, 0), rowspan=2)
                ax3 = plt.subplot2grid((6, 2), (4, 0), rowspan=2)
                ax4 = plt.subplot2grid((6, 2), (0, 1), rowspan=3)
                ax5 = plt.subplot2grid((6, 2), (3, 1), rowspan=3)
                # fig, axs = plt.subplots(2, 2, sharex=False, sharey=False)
                # fig.set_size_inches(12, 12)

            ############################### PLOT AVERAGE EVOKED RESPONSE ######################
                PrintNoNewLine('Calculating average filtered responses for {} trials...'.format(epoch_name))
                signal_mean = signal.mean(axis=1)
                reference_mean = reference.mean(axis=1)

                signal_sem = signal.sem(axis=1)
                reference_sem = reference.sem(axis=1)

                signal_dc = signal_mean.mean()
                reference_dc = reference_mean.mean()

                signal_avg_response = signal_mean - signal_dc 
                reference_avg_response = reference_mean - reference_dc

                if smoothing_window is not None:
                    signal_avg_response = SmoothSignalWithPeriod(x=signal_avg_response, 
                        sampling_rate=float(sampling_rate)/downsample, 
                        ms_bin=smoothing_window, window='flat')
                    reference_avg_response = SmoothSignalWithPeriod(x=reference_avg_response, 
                        sampling_rate=float(sampling_rate)/downsample, 
                        ms_bin=smoothing_window, window='flat')
                    signal_sem = SmoothSignalWithPeriod(x=signal_sem, 
                        sampling_rate=float(sampling_rate)/downsample, 
                        ms_bin=smoothing_window, window='flat')
                    reference_sem = SmoothSignalWithPeriod(x=reference_sem, 
                        sampling_rate=float(sampling_rate)/downsample, 
                        ms_bin=smoothing_window, window='flat')

                # # Scale signal if it is too weak (want std to be at least 1)
                # if (np.abs(signal_avg_response.std()) < 1.) or (np.abs(reference_avg_response.std()) < 1.):
                #     scale_factor = 10**(np.ceil(np.log10(1/(signal_avg_response).std())))

                #     signal_avg_response = signal_avg_response * scale_factor
                #     signal_se = signal_se * scale_factor
                #     reference_avg_response = reference_avg_response * scale_factor
                #     reference_se = reference_se * scale_factor

                # Plotting signal
                # current axis
                #curr_ax = axs[0, 0]
                curr_ax = ax1
                curr_ax.plot(signal_avg_response.index, signal_avg_response.values, color='b', linewidth=2)
                curr_ax.fill_between(signal_avg_response.index, (signal_avg_response - signal_sem).values, 
                    (signal_avg_response + signal_sem).values, color='b', alpha=0.05)

                # Plotting reference
                curr_ax.plot(reference_avg_response.index, reference_avg_response.values, color='g', linewidth=2)
                curr_ax.fill_between(reference_avg_response.index, (reference_avg_response - reference_sem).values, 
                    (reference_avg_response + reference_sem).values, color='g', alpha=0.05)

                # Plot event onset
                curr_ax.axvline(0, color='black', linestyle='--')
                curr_ax.set_ylabel('Voltage (V)')
                curr_ax.set_xlabel('Time (s)')
                curr_ax.legend(['465 nm', '405 nm', event])
                curr_ax.set_title('Average Lowpass Signal $\pm$ SEM: {} Trials'.format(signal.shape[1]))
                print('Done!')
            ############################# Calculate detrended signal #################################
                if z_score_before_alignment:
                    detrended_signal = segment.analyzed[lookup['Detrended']]['all_traces']

                    # Adding detrended reference
                    detrended_ref = segment.analyzed[lookup['Detrended_reference']]['all_traces']
                    detrended_ref_mean = detrended_ref.mean(axis=1)

                    detrended_ref_sem = detrended_ref.sem(axis=1)

                    if smoothing_window is not None:
                        detrended_ref_mean = SmoothSignalWithPeriod(x=detrended_ref_mean, 
                            sampling_rate=float(sampling_rate)/downsample, 
                            ms_bin=smoothing_window, window='flat')
                        detrended_ref_sem = SmoothSignalWithPeriod(x=detrended_ref_sem, 
                            sampling_rate=float(sampling_rate)/downsample, 
                            ms_bin=smoothing_window, window='flat')
                else:
                    # Detrending
                    PrintNoNewLine('Detrending signal...')
                    fits = np.array([np.polyfit(reference.values[:, i],signal.values[:, i],1) for i in range(signal.shape[1])])
                    Y_fit_all = np.array([np.polyval(fits[i], reference.values[:,i]) for i in np.arange(reference.values.shape[1])]).T
                    Y_df_all = signal.values - Y_fit_all
                    detrended_signal = pd.DataFrame(Y_df_all, index=signal.index)

            ################# PLOT DETRENDED SIGNAL ###################################

                detrended_signal_mean = detrended_signal.mean(axis=1)

                detrended_signal_sem = detrended_signal.sem(axis=1)

                if smoothing_window is not None:
                    detrended_signal_mean = SmoothSignalWithPeriod(x=detrended_signal_mean, 
                        sampling_rate=float(sampling_rate)/downsample, 
                        ms_bin=smoothing_window, window='flat')
                    detrended_signal_sem = SmoothSignalWithPeriod(x=detrended_signal_sem, 
                        sampling_rate=float(sampling_rate)/downsample, 
                        ms_bin=smoothing_window, window='flat')

                # Plotting signal
                # current axis
                curr_ax = ax2
                # # curr_ax = axs[1, 0]
                #curr_ax = plt.axes()
                if z_score_before_alignment:
                    pass
                else:
                    zscore_start = detrended_signal[z_score_window[0]:z_score_window[1]].index[0]
                    zscore_end = detrended_signal[z_score_window[0]:z_score_window[1]].index[-1]
                    zscore_height = detrended_signal[z_score_window[0]:z_score_window[1]].mean(axis=1).min()
                    if zscore_height < 0:
                        zscore_height = zscore_height * 1.3 
                    else:
                        zscore_height = zscore_height * 0.7

                    curr_ax.plot([zscore_start, zscore_end], [zscore_height, zscore_height], color='.1', linewidth=3)

                # Plot detrended signal
                curr_ax.plot(detrended_signal_mean.index, detrended_signal_mean.values, color='b', linewidth=2)
                curr_ax.fill_between(detrended_signal_mean.index, (detrended_signal_mean - detrended_signal_sem).values, 
                    (detrended_signal_mean + detrended_signal_sem).values, color='b', alpha=0.05)

                # Plot detrended reference if necessary
                if z_score_before_alignment:
                    curr_ax.plot(detrended_ref_mean.index, detrended_ref_mean.values, color='g', linewidth=2)
                    curr_ax.fill_between(detrended_ref_mean.index, (detrended_ref_mean - detrended_ref_sem).values, 
                        (detrended_ref_mean + detrended_ref_sem).values, color='g', alpha=0.05)

                # Plot event onset
                if z_score_before_alignment:
                    curr_ax.legend(['465 nm', '405 nm'])
                else:
                    curr_ax.legend(['z-score window'])
                curr_ax.axvline(0, color='black', linestyle='--')
                curr_ax.set_ylabel('Voltage (V) or DeltaF/F %')
                curr_ax.set_xlabel('Time (s)')
                curr_ax.set_title('Average Detrended Signal $\pm$ SEM')

                print('Done!')
            
            # ########### Calculate z-scores ###############################################
                if z_score_before_alignment:
                    zscores = segment.analyzed[lookup['DeltaF_F_or_Z_score']]['all_traces']
                else:
                    PrintNoNewLine('Calculating Z-Scores for %s trials...' % event)
                    # calculate z_scores
                    zscores = ZScoreCalculator(detrended_signal, baseline_start=z_score_window[0], 
                        baseline_end=z_score_window[1])
                    print('Done!')


            ############################ Make rasters #######################################
                PrintNoNewLine('Making heatmap for %s trials...' % event)
                # indice that is closest to event onset
                # curr_ax = axs[0, 1]
                curr_ax = ax4
                # curr_ax = plt.axes()
                # Plot nearest point to time zero
                zero = np.concatenate([np.where(zscores.index == np.abs(zscores.index).min())[0], 
                    np.where(zscores.index == -1*np.abs(zscores.index).min())[0]]).min()
                for_hm = zscores.T.copy()
                # for_hm.index = for_hm.index + 1
                for_hm.columns = np.round(for_hm.columns, 1)
                try:
                    sns.heatmap(for_hm.iloc[::-1], center=0, robust=True, ax=curr_ax, cmap='bwr',
                        xticklabels=int(for_hm.shape[1]*.15), yticklabels=int(for_hm.shape[0]*.15), 
                        vmin=heatmap_range[0], vmax=heatmap_range[1])
                except:
                    sns.heatmap(for_hm.iloc[::-1], center=0, robust=True, ax=curr_ax, cmap='bwr', 
                        xticklabels=int(for_hm.shape[1]*.15), vmin=heatmap_range[0], vmax=heatmap_range[1])
                curr_ax.axvline(zero, linestyle='--', color='black', linewidth=2)
                curr_ax.set_ylabel('Trial');
                curr_ax.set_xlabel('Time (s)');
                if z_score_before_alignment:
                    curr_ax.set_title('Z-Score or DeltaF/F Heat Map');
                else:
                    curr_ax.set_title('Z-Score Heat Map \n Baseline Window: {} to {} Seconds'.format(z_score_window[0], z_score_window[1]));
                print('Done!')
            ########################## Plot Z-score waveform ##########################
                PrintNoNewLine('Plotting Z-Score waveforms...')
                zscores_mean = zscores.mean(axis=1)

                zscores_sem = zscores.sem(axis=1)

                if smoothing_window is not None:
                    zscores_mean = SmoothSignalWithPeriod(x=zscores_mean, 
                        sampling_rate=float(sampling_rate)/downsample, 
                        ms_bin=smoothing_window, window='flat')
                    zscores_sem = SmoothSignalWithPeriod(x=zscores_sem, 
                        sampling_rate=float(sampling_rate)/downsample, 
                        ms_bin=smoothing_window, window='flat')
                # Plotting signal
                # current axis
                # curr_ax = axs[1, 1]
                curr_ax = ax3
                #curr_ax = plt.axes()
                # Plot baseline and response
                baseline_start = zscores[baseline_window[0]:baseline_window[1]].index[0]
                baseline_end = zscores[baseline_window[0]:baseline_window[1]].index[-1]
                response_start = zscores[response_window[0]:response_window[1]].index[0]
                response_end = zscores[response_window[0]:response_window[1]].index[-1]
                baseline_height = zscores[baseline_window[0]:baseline_window[1]].mean(axis=1).min() - 0.5
                response_height = zscores[response_window[0]:response_window[1]].mean(axis=1).max() + .5

                curr_ax.plot([baseline_start, baseline_end], [baseline_height, baseline_height], color='.6', linewidth=3)
                curr_ax.plot([response_start, response_end], [response_height, response_height], color='r', linewidth=3)

                curr_ax.plot(zscores_mean.index, zscores_mean.values, color='b', linewidth=2)
                curr_ax.fill_between(zscores_mean.index, (zscores_mean - zscores_sem).values, 
                    (zscores_mean + zscores_sem).values, color='b', alpha=0.05)

                # Plot event onset
                curr_ax.axvline(0, color='black', linestyle='--')

                curr_ax.set_xlabel('Time (s)')
                curr_ax.legend(['baseline window', 'response window'])
                if z_score_before_alignment:
                    curr_ax.set_title('465 nm Average Z-Score or DeltaF/F Signal $\pm$ SEM')
                    curr_ax.set_ylabel('Z-Score or DeltaF/F %')
                else:
                    curr_ax.set_title('465 nm Average Z-Score Signal $\pm$ SEM')
                    curr_ax.set_ylabel('Z-Score')
                print('Done!')
            ##################### Quantification #################################
                PrintNoNewLine('Performing statistical testing on baseline vs response periods...')
                if quantification is not None:
                    # Generating summary statistics
                    if quantification == 'AUC':
                        base = np.trapz(zscores[baseline_window[0]:baseline_window[1]], axis=0)
                        resp = np.trapz(zscores[response_window[0]:response_window[1]], axis=0)
                        ylabel = 'AUC'
                    elif quantification == 'mean':
                        base = np.mean(zscores[baseline_window[0]:baseline_window[1]], axis=0)
                        resp = np.mean(zscores[response_window[0]:response_window[1]], axis=0)
                        ylabel = 'Z-Score or DeltaF/F'
                    elif quantification == 'median':
                        base = np.median(zscores[baseline_window[0]:baseline_window[1]], axis=0)
                        resp = np.median(zscores[response_window[0]:response_window[1]], axis=0)
                        ylabel = 'Z-Score or DeltaF/F'

                    if isinstance(base, pd.core.series.Series):
                        base = base.values
                        resp = resp.values

                    base_sem = np.mean(base)/np.sqrt(base.shape[0])
                    resp_sem = np.mean(resp)/np.sqrt(resp.shape[0])

                    # Testing for normality (D'Agostino's K-Squared Test) (N>8)
                    if base.shape[0] > 8:
                        normal_alpha = 0.05
                        base_normal = stats.normaltest(base)
                        resp_normal = stats.normaltest(resp)
                    else:
                        normal_alpha = 0.05
                        base_normal = [1, 1]
                        resp_normal = [1, 1]

                    difference_alpha = 0.05
                    if (base_normal[1] >= normal_alpha) or (resp_normal[1] >= normal_alpha):
                        test = 'Wilcoxon Signed-Rank Test'
                        stats_results = stats.wilcoxon(base, resp)
                    else:
                        test = 'Paired Sample T-Test'
                        stats_results = stats.ttest_rel(base, resp)

                    if stats_results[1] <= difference_alpha:
                        sig = '**'
                    else:
                        sig = 'ns'

                    #curr_ax = plt.axes() 
                    curr_ax = ax5
                    ind = np.arange(2)
                    labels = ['baseline', 'response']
                    bar_kwargs = {'width': 0.7,'color': ['.6', 'r'],'linewidth':2,'zorder':5}
                    err_kwargs = {'zorder':0,'fmt': 'none','linewidth':2,'ecolor':'k'}
                    curr_ax.bar(ind, [base.mean(), resp.mean()], tick_label=labels, **bar_kwargs)
                    curr_ax.errorbar(ind, [base.mean(), resp.mean()], yerr=[base_sem, resp_sem],capsize=5, **err_kwargs)
                    x1, x2 = 0, 1
                    y = np.max([base.mean(), resp.mean()]) + np.max([base_sem, resp_sem])*1.3
                    h = y * 1.5
                    col = 'k'
                    curr_ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
                    curr_ax.text((x1+x2)*.5, y+h, sig, ha='center', va='bottom', color=col)
                    curr_ax.set_ylabel(ylabel)
                    curr_ax.set_title('Baseline vs. Response Changes in Z-Score or DeltaF/F Signal \n {} of {}s'.format(test, quantification))

                    print('Done!')
            ################# Save Stuff ##################################
                PrintNoNewLine('Saving everything...')
                save_path = os.path.join(dpath, segment_name, save_file_as)
                figure.savefig(save_path + '.png', format='png')
                figure.savefig(save_path + '.pdf', format='pdf')
                plt.close()
                print('Done!')

                # Trial z-scores
                # Fix columns
                zscores.columns = np.arange(1, zscores.shape[1] + 1)
                zscores.columns.name = 'trial'
                # Fix rows 
                zscores.index.name = 'time'
                zscores.to_csv(save_path + '_zscores_or_deltaf_aligned.csv')
                Downsample(zscores, downsample, index_col='time').to_csv(save_path + '_zscores_or_deltaf_aligned_downsampled.csv')
                if quantification is not None:
                    # Trial point estimates
                    point_estimates = pd.DataFrame({'baseline': base, 'response': resp}, 
                        index=np.arange(1, base.shape[0]+1))
                    point_estimates.index.name = 'trial'
                    point_estimates.to_csv(save_path + '_point_estimates.csv')
                # Save meta data
                metadata = {
                    'baseline_window': baseline_window,
                    'response_window': response_window, 
                    'quantification': quantification,
                    'original_sampling_rate': float(sampling_rate),
                    'downsampled_sampling_rate': float(sampling_rate)/downsample
                }
                with open(save_path + '_metadata.json', 'w') as fp:
                    json.dump(metadata, fp)
                # Save smoothed data
                smoothed_zscore = pd.concat([zscores_mean, zscores_sem], axis=1)
                smoothed_zscore.columns = ['mean', 'sem']
                smoothed_zscore.to_csv(save_path + '_smoothed_zscores_or_deltaf.csv')
                Downsample(smoothed_zscore, downsample, index_col='time').to_csv(save_path + '_smoothed_zscores_or_deltaf_downsampled.csv')

        print(('Finished processing datapath: %s' % dpath))
    return trials, seglist, all_signals
# 1. What is the change in price of the stock over time?
# 2. What is the daily return of the stock on average?
# 3. What is the moving average of the various stocks?
# 4.1. What is the correlation between different stocks' closing prices?
# 4.2. What is the correlation between different stocks' daily returns?
# 5. How much value is at risk if we invest in a particular stock?
# 6. How can we attempt to predict future stock behavior?
# %%
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
# %%
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline
# %%
from pandas_datareader import DataReader
# %%
from datetime import datetime
# %%
from __future__ import division
# %%
# This is a tech list of the big companies: Apple, Google, Microsoft, and Amazon

tech_list = ['AAPL', 'GOOG', 'MSFT', 'AMZN']
# %%
end = datetime.now()

start = datetime(end.year-1, end.month, end.day)
Example #43
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 23 17:06:01 2019

@author: qianqianwang
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
import warnings
warnings.filterwarnings('ignore')
from scipy import stats
from scipy.stats import norm, skew
import time

# read data
df_train = pd.read_csv('df37_window1_train.csv')
df_test = pd.read_csv('df37_window1_test.csv')

from collections import Counter


# Outlier detection
def detect_outliers(df, n, features):
    """
    Takes a dataframe df of features and returns a list of the indices
    corresponding to the observations containing more than n outliers according
            if np.any(np.isnan(check_nan)):
                continue
            c,p = stats.pearsonr(var,data[:,mm,nn])
            corr.data[mm,nn] = c # convert trend to per decade
            sig = (p < (1-ci))
            corr.mask[mm,nn] = ~sig
    return corr

corr_DJF = corr_season(t2m_ave, sst_xr_DJF)
corr_SON = corr_season(t2m_ave, sst_xr_SON)
corr_JJA = corr_season(t2m_ave, sst_xr_JJA)
corr_MAM = corr_season(t2m_ave, sst_xr_MAM)


# plot, mind the projection which matters
sns.set_style('white', {'font.family': 'Arial'})
lon_formatter = LongitudeFormatter(zero_direction_label=True)
lat_formatter = LatitudeFormatter()

lonlbl = [u'160°E',u'10°W',u'40°W',u'70°W',u'100°W']
latlbl = [u'10°S',u'5°S',u'0°',u'5°N',u'10°N']
fig = plt.figure(figsize=(12,9),dpi=300)
ax0 = fig.add_subplot(221, projection=ccrs.PlateCarree(central_longitude = 180))
plt.subplots_adjust(wspace =0.5, hspace =0.2)# adjust the space of subplots
cs1 = ax0.contour(lon_sst, lat_sst, sst_xr_DJF.mean(axis = 0), np.linspace(-0.02,0.03,6), colors='k',transform=ccrs.PlateCarree())
cs2 = ax0.contourf(lon_sst, lat_sst, corr_DJF.data, np.linspace(-0.5,0.5,11),
                   cmap=plt.cm.RdBu_r, extend='both',transform=ccrs.PlateCarree())
ax0.contourf(lon_sst, lat_sst, corr_DJF.mask.astype('int'), [-0.5,0.5], hatches=['.','none'],
             colors='none', zorder=10,transform=ccrs.PlateCarree())
ax0.clabel(cs1, inline=1, fontsize=8)
ax0.set_extent([120, 280, -60, 60],crs=ccrs.PlateCarree())
Example #45
0
                            dtype=float,
                            usecols=[1],
                            skiprows=3)
P_k11_evo_opt = np.loadtxt('Knigge11/knigge11_evo_track_opt.dat',
                           dtype=float,
                           usecols=[2],
                           skiprows=3)
zeta_k11_evo_opt = np.loadtxt('Knigge11/knigge11_evo_track_opt.dat',
                              dtype=float,
                              usecols=[7],
                              skiprows=3)

#produce plot

seaborn.set(style='ticks')
seaborn.set_style({"xtick.direction": "in", "ytick.direction": "in"})

#plt.rcParams['xtick.major.pad']='15'
#plt.rcParams['ytick.major.pad']='10'

#plt.axis([1,1.7001,0,0.15001])
#plt.axis([0.02,0.2,0.08,0.3])
#plt.axis([1.0,3.001,0.02,0.301])
#plt.axis([1.1,1.6,0.03,0.11])

#plt.axis([0.01,0.2,0.08,0.260])
#plt.tick_params(top='on',right='on')

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.set_xscale('log')
                      False,
                      save_file=scratch_dir / "CHECKM8_RAND_APPROX.png")
        data.append({
            "Strategy":
            str(scheduler_lp_rand.solve_strategy.value),
            "Name":
            "CHECKM8_RAND_APPROX",
            "CPU":
            scheduler_lp_rand.schedule_aux_data.cpu,
            "Activation RAM":
            scheduler_lp_rand.schedule_aux_data.activation_ram,
        })

    # Plot solution memory usage vs cpu scatter plot
    sns.set()
    sns.set_style("white")

    plt.figure(figsize=(4, 4))
    plt.xlabel("Activation memory usage (GB)")
    plt.ylabel("GPU time (ms)")

    color, marker, markersize = SolveStrategy.get_plot_params(
        scheduler_result_all.solve_strategy)
    plt.axhline(y=scheduler_result_all.schedule_aux_data.cpu / 1000,
                color=color,
                linestyle="--",
                label="Checkpoint all (ideal)")

    if args.model_name in LINEAR_MODELS:
        color, marker, markersize = SolveStrategy.get_plot_params(
            scheduler_result_sqrtn.solve_strategy)
Example #47
0
import os
import seaborn as sns
import pickle as pkl
pal = sns.color_palette('Blues')
sns.set_context("paper", font_scale=1.5)
sns.set_style("ticks")
import numpy as np
import matplotlib.pyplot as plt
import pdb
from scipy import stats
import matplotlib.cm as cm

# In[173]:

dic = pkl.load(
    open('/users/global/cornkle/C_paper/wavelet/saves/bulk_40big_zR.p',
         'rb'))  #MSG_TRMM_temp_pcp_300px2004-2013_new.p', 'rb'))
dic2 = pkl.load(
    open('/users/global/cornkle/C_paper/wavelet/saves/bulk_40big_size_zR.p',
         'rb'))

# In[174]:

_p = np.array(dic['pmax'])  # 98th perc per MCS
_t = np.array(dic['tmin'])  #mean T
_clat = np.array(dic['clat'])
_area = np.array(dic['area']) * 25
_isfin = np.array(dic['isfin'])
_po30 = np.array(dic['po30'])
_perc = np.array(dic['pperc'])
_pp = np.array(dic['p'])
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from pydotplus import graph_from_dot_data
'''
Graphviz is not a python tool. The python packages at pypi provide a convenient way of using Graphviz in python code.
You still have to install the Graphviz executables, which are not pythonic, thus not shipped with these packages. 
You can install those e.g. with a general-purpose package manager such as homebrew

brew install graphviz
pydot.InvocationException: GraphViz's executables not found

'''
from sklearn.tree import export_graphviz
from pydotplus import graph_from_dot_data

sns.set_style("dark")
colors = ["#800000", "#45ada8", "#2a363b", "#fecea8", "#99b898", "#e5fcc2"]
sns.set_palette(sns.color_palette(colors))

breast_data = pd.read_csv('./data/data.csv')
#breast_data = breast_data.drop(['ID','Unnamed: 32'],axis=1)

#drop diagnosis, create X and Y
y = breast_data['diagnosis']
x_ = breast_data.drop('diagnosis', axis=1)
x = x_.drop('id', axis=1)

#replace M and B with 1s and 0s
y = y.replace(['M', 'B'], [1, 0])
columns = x.columns

#%% Play with scanpys PCA
sc.tl.pca(adata_pre, n_comps=50, zero_center=True, svd_solver='auto', random_state=0, return_info=False, use_highly_variable=None, dtype='float32', copy=False, chunked=False, chunk_size=None)
#%%
classvecser= adata_pre.obs['survivor']
classvec = pd.DataFrame(classvecser)

PCs=adata_pre.obsm['X_pca']
PCdf = pd.DataFrame(PCs)
classvec.reset_index(drop=True, inplace=True)
PCdf.reset_index(drop=True, inplace=True)

PC_df=pd.concat([classvec['survivor'],PCdf], axis =1)
#%%
sns.set_style('white')
from matplotlib.pyplot import plot, show, draw, figure, cm
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(6,6))


ax=sns.scatterplot(PC_df[0], PC_df[1], hue= PC_df['survivor'])
ax.set(xlabel ='PC1', ylabel ='PC2') 

ax1=sns.scatterplot(PC_df[1], PC_df[2], hue= PC_df['survivor'])
ax1.set(xlabel ='PC2', ylabel ='PC3') 

ax2=sns.scatterplot(PC_df[2], PC_df[3], hue= PC_df['survivor'])
ax2.set(xlabel ='PC3', ylabel ='PC4') 

ax3=sns.scatterplot(PC_df[0], PC_df[2], hue= PC_df['survivor'])
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from scipy.stats import norm
from scipy import stats
from scipy.stats import skew

import matplotlib as mpl
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['font.serif'] = ['SimHei']
mpl.rcParams['font.size'] = 10
sns.set_style("darkgrid",{"font.sans-serif":['simhei', 'Arial']})

NROWS = 200000

def cal_car_state(fix_record):
    state = 0
    if isinstance(fix_record, str) is True:
        a = fix_record.count('喷漆修复')
        b = fix_record.count('覆盖件更换')
        c = fix_record.count('钣金修复')
        d = fix_record.count('有色差')
        state = a * 0.2 + b * 0.3 + c * 0.4 + d * 0.1
        state = state/(a+b+c+d)
    return (1 - state) * 10

current_year = datetime.datetime.now().year
Example #51
0
def plot_score_distribution(config):
    info('plot_score_distribution()')
    cwd_slash = gen_cwd_slash(config)

    thdf = pd.read_csv(cwd_slash('calibrated_threshold.csv'), index_col=0)
    thdf.index = thdf.index.astype(str)

    valid_agg = pd.read_csv(cwd_slash('valid_aggregated_prediction.csv'))
    valid_agg = valid_agg.melt(id_vars='source_img_id',
                               var_name='class_id',
                               value_name='p')
    valid_agg['logit'] = logit(valid_agg['p'])
    valid_grouped = valid_agg.groupby('class_id')

    test_agg = pd.read_csv(cwd_slash('test_aggregated_prediction.csv'))
    test_agg = test_agg.melt(id_vars='source_img_id',
                             var_name='class_id',
                             value_name='p')
    test_agg['logit'] = logit(test_agg['p'])
    test_grouped = test_agg.groupby('class_id')

    mean_std_df = pd.DataFrame(
        {
            'valid_mean': valid_grouped['logit'].mean(),
            'valid_std': valid_grouped['logit'].std(),
            'test_mean': test_grouped['logit'].mean(),
            'test_std': test_grouped['logit'].std(),
        }, )

    thdf = thdf.join(mean_std_df, how='left')
    thdf['th_logit'] = logit(thdf['best_threshold'])
    thdf['z_score'] = (thdf['th_logit'] -
                       thdf['valid_mean']) / thdf['valid_std']
    thdf['th_adjusted_logit'] = thdf['z_score'] * thdf['test_std'] + thdf[
        'test_mean']
    thdf['th_adjusted'] = np.exp(
        thdf['th_adjusted_logit']) / (np.exp(thdf['th_adjusted_logit']) + 1)
    print(thdf)

    valid_agg['group'] = 'valid'
    test_agg['group'] = 'test'
    both_agg = pd.concat([valid_agg, test_agg])
    both_agg['class_id'] = [
        f"{int(xx):02d}-{class_labels[int(xx)]}" for xx in both_agg['class_id']
    ]

    plt.figure(figsize=(30, 16))
    sns.set_style("whitegrid")
    sns.stripplot(x='class_id',
                  y='logit',
                  hue='group',
                  data=both_agg,
                  jitter=0.3,
                  alpha=0.3,
                  size=1,
                  dodge=True)
    # sns.violinplot(x='class_id', y='logit', hue='group', data=both_agg, inner=None, color='.8', cut=0, bw=0.001)

    for id_, row in thdf.iterrows():
        plt.plot([int(id_) - 0.4, int(id_)],
                 [logit(row['best_threshold']),
                  logit(row['best_threshold'])])
        plt.plot([int(id_), int(id_) + 0.4], [
            logit(row['best_threshold'] * 0.5),
            logit(row['best_threshold'] * 0.5)
        ])
        plt.plot([int(id_), int(id_) + 0.4],
                 [logit(row['th_adjusted']),
                  logit(row['th_adjusted'])],
                 dashes=[1, 1])

    plt.axhline(0, dashes=[2, 1, 1, 1])

    save_path = cwd_slash('score_distribution.png')

    plt.xticks(rotation=30,
               verticalalignment='top',
               horizontalalignment='right')
    plt.savefig(save_path, dpi=300)
    debug(f"saved to {save_path}")
    <<< Unstable particles are emitted from a source and decay at a
    distance x, a real number that has an exponential probability
    distribution with [parameter] lambda.  Decay events can only be
    observed if they occur in a window extending from x=1 cm to x=20
    cm. N decays are observed at locations { 1.5, 2, 3, 4, 5, 12}
    cm. What is [the distribution of] lambda?
"""

import sys
import numpy as np
import pandas as pd
from scipy.integrate import quad
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style(style='dark')

# Routine for constructing posterior predictive as a mixture of pdfs:
from posterior_predictive_pmf import functional_posterior_predictive_pmf


def expon(X, lambd):
    """
        Note: could equivalently use scipy.stats.expon.
        X and/or lambd may be vectors.
    """
    return (1 / lambd) * np.exp(-X / lambd)


def expon_integral(lambd, xmin, xmax):
    """
Example #53
0
Maximilian N. Günther
MIT Kavli Institute for Astrophysics and Space Research, 
Massachusetts Institute of Technology,
77 Massachusetts Avenue,
Cambridge, MA 02109, 
USA
Email: [email protected]
Web: www.mnguenther.com
"""

from __future__ import print_function, division, absolute_import

#::: plotting settings
import seaborn as sns
sns.set(context='paper', style='ticks', palette='deep', font='sans-serif', font_scale=1.5, color_codes=True)
sns.set_style({"xtick.direction": "in","ytick.direction": "in"})
sns.set_context(rc={'lines.markeredgewidth': 1})

#::: modules
import numpy as np
import matplotlib.pyplot as plt
import os
from astropy.io import fits
from collections import OrderedDict

#::: exoworld modules
from exoworlds.lightcurves import expand_flags



Example #54
0
    def initUI(self):
        pg.setConfigOption('background', 'w')
        params = {
                'figure.figsize': [4, 4],
                'figure.dpi': 300,
                'savefig.dpi': 300
           }
        plt.rcParams.update(params)
        
        sns.set()
        sns.set_style("white")
        sns.set_palette("muted")
        sns.set_context("paper")
        
        self.fullSignal=[]
        self.shiftFullSignal=[]
        self.shiftFullSignalNormal=[]

        self.fSig=''
        
        contain=QSplitter(Qt.Horizontal)
      
        buttons = QtWidgets.QVBoxLayout()
        graphics = QSplitter(Qt.Vertical)
        imaFrac = QtWidgets.QHBoxLayout()
        frac = QtWidgets.QVBoxLayout()
        lagBox = QFormLayout()
        results =  QFormLayout()


        self.btnLoadSig = QPushButton('Load Signal')
        self.btnLoadSig.clicked.connect(self.loadSignal)
        self.btnLoadSig.setStyleSheet("background-color:#fbe9e7; font-size: 18px")
        
        self.lblSignal = QLabel('')
        self.lblSignal.setStyleSheet("font-size: 18px")
        
        self.checkTotalSignal = QCheckBox('Signal')
        self.checkTotalSignal.setStyleSheet("font-size: 18px") 

        self.cmbFractal = QComboBox()
        self.cmbFractal.setStyleSheet("background-color:#fbe9e7; font-size: 18px")
        self.cmbFractal.addItem("Triangle") #Elemento 0
        self.cmbFractal.addItem("Square") #Elemento 1
        self.cmbFractal.addItem("Pentagon") #Elemento 2
        self.cmbFractal.addItem("Hexagon") #Elemento 3
        #self.cmbFractal.addItem("Octgon") #Ahora éste es el elemento 4
        
        self.btnDo = QPushButton("Do Fractal")
        self.btnDo.setDisabled(True)
        self.btnDo.setStyleSheet("font-size: 18px")
        self.btnDo.clicked.connect(self.showDialog)
        
        self.btnFracInter = QPushButton("Points-Inter")
        self.btnFracInter.setDisabled(True)
        self.btnFracInter.setStyleSheet("font-size: 18px")
        self.btnFracInter.clicked.connect(self.update)
        
        self.txtLag = QLineEdit('0')
        self.txtLag.setStyleSheet("font-size: 18px")
        self.txtLag.setEnabled(True)
        lblLag = QLabel("LAG")
        lblLag.setStyleSheet("font-size: 18px")
        
        lagBox.addRow(lblLag,  self.txtLag)
        
        self.btnSub = QPushButton("Graph Poincare")
        self.btnSub.setDisabled(True)
        self.btnSub.setStyleSheet("font-size: 18px")
        self.btnSub.clicked.connect(self.poincSub)
        
        self.lblsd1 = QLabel("SD1: ")
        self.lblsd1.setEnabled(True)  
        self.lblsd1.setStyleSheet("font-size: 18px")
        self.txtsd1 = QLineEdit('')
        self.txtsd1.setEnabled(True)
        self.txtsd1.setStyleSheet("font-size: 18px")
        
        self.lblsd2 = QLabel("SD2: ")
        self.lblsd2.setEnabled(True) 
        self.lblsd2.setStyleSheet("font-size: 18px")
        self.txtsd2 = QLineEdit('')
        self.txtsd2.setEnabled(True)
        self.txtsd2.setStyleSheet("font-size: 18px")
        
        self.lblc1 = QLabel("C1: ")
        self.lblc1.setEnabled(True)  
        self.lblc1.setStyleSheet("font-size: 18px")
        self.txtc1 = QLineEdit('')
        self.txtc1.setEnabled(True)
        self.txtc1.setStyleSheet("font-size: 18px")
        
        self.lblc2 = QLabel("C2: ")
        self.lblc2.setEnabled(True) 
        self.lblc2.setStyleSheet("font-size: 18px")
        self.txtc2 = QLineEdit('')
        self.txtc2.setEnabled(True)
        self.txtc2.setStyleSheet("font-size: 18px")
        
        results.addRow(self.lblsd1, self.txtsd1)
        results.addRow(self.lblsd2, self.txtsd2)
        results.addRow(self.lblc1, self.txtc1)
        results.addRow(self.lblc2, self.txtc2)
        
        self.btnSave = QPushButton("Save Current Data")
        self.btnSave.setDisabled(True)
        self.btnSave.setStyleSheet("font-size: 18px")
        self.btnSave.clicked.connect(self.saveFile)
        
        self.viewBox=pg.GraphicsLayoutWidget()
        self.interFrac = self.viewBox.addViewBox(row=0, col=0, lockAspect=True)
        self.rafFrac = self.viewBox.addViewBox(row=0, col=1, lockAspect=True)
        self.bothFrac = self.viewBox.addViewBox(row=0, col=2, lockAspect=True)
        self.aleatFrac = self.viewBox.addViewBox(row=0, col=3, lockAspect=True)
        
        self.scaInter=pg.ScatterPlotItem()
        self.scaRaf=pg.ScatterPlotItem()
        self.scaBoth=pg.ScatterPlotItem()
        self.scaAleat=pg.ScatterPlotItem()
        

        self.viewBox=pg.GraphicsLayoutWidget()
        self.interFrac = self.viewBox.addPlot()#ViewBox(row=0, col=0, lockAspect=True)
        self.interFrac.setYRange(-0.1, 1.1, padding=0)
        self.interFrac.setXRange(-0.1, 1.1, padding=0)

        self.poinc = self.viewBox.addPlot()#ViewBox(row=0, col=1, lockAspect=True)
        
        self.scaInter=pg.ScatterPlotItem()
        self.scaPoinc=pg.ScatterPlotItem()

        self.roiInter=pg.PolyLineROI([[0.2, 0.5], [0.8, 0.5], [0.5, 0]], pen=(6,9), closed=True)

        imaFrac.addWidget(self.viewBox)
                
        buttons.setSizeConstraint(0)
        buttons.addWidget(self.btnLoadSig)
        buttons.addWidget(self.lblSignal)
        buttons.addWidget(self.checkTotalSignal)
        
        nomFractal = QLabel("Fractal Type")
        nomFractal.setStyleSheet("font-size: 18px")
        buttons.addWidget(nomFractal)
        buttons.addWidget(self.cmbFractal)
        
        buttons.addWidget(self.btnDo)
        buttons.addWidget(self.btnFracInter)
        buttons.addLayout(lagBox)
        buttons.addWidget(self.btnSub)
        buttons.addLayout(results)
        buttons.addWidget(self.btnSave)
        
        frac.addLayout(imaFrac)
        
        self.plot1=pg.PlotWidget()
        fra = QWidget()
        fra.setLayout(frac)

        graphics.addWidget(fra)
        graphics.addWidget(self.plot1)
        bot = QWidget()
        bot.setLayout(buttons)

        contain.addWidget(bot)
        contain.addWidget(graphics)
        self.addWidget(contain)
Example #55
0
def plot_sensitivity(nc_dir, var_name, level='none', ylab='None'):
    """Given Wallerfing sensitivity JULES model output plots given variable sensitivity.

    :param nc_dir: Directory location for JULES output.
    :type nc_dir: str
    :param var_name: Name of variable to plot.
    :type var_name: str
    :param level: If variable is 4D specify 4th dimension.
    :type level: int
    :param ylab: Label for Y-axis.
    :type ylab: str
    :return: Figure object to save.
    :rtype: object
    """
    sns.set_context('poster',
                    font_scale=1.2,
                    rc={
                        'lines.linewidth': 1,
                        'lines.markersize': 10
                    })
    fig, ax = plt.subplots(
        nrows=1,
        ncols=1,
    )  # figsize=(15, 5))
    sns.set_style('whitegrid')
    palette = sns.color_palette("colorblind", 11)
    for nc_file in glob.glob(nc_dir + 'crp_g_*.3_hourly.nc'):
        dat = open_nc(nc_file)
        lats, lons, var, time = extract_vars_nc(dat,
                                                var_name,
                                                strt_yr=2012,
                                                end_yr=2012,
                                                strt_day=1,
                                                strt_hr=3)
        times = nc.num2date(time[:], time.units)
        plt_var = var[:]
        plt_var[plt_var > 1e18] = np.nan
        depths = [100, 250, 650, 2000]
        if var_name == 'smcl':
            ax.plot(
                times[:],
                plt_var[:, level, 0, 0] / depths[level],
            )
        elif level != 'none':
            ax.plot(
                times[:],
                plt_var[:, level, 0, 0],
            )
        else:
            ax.plot(
                times[:],
                plt_var[:, 0, 0],
            )
    #plt.ylabel('Volumetric soil water content (m3 m-3)')
    plt.xlabel('Date')
    plt.gcf().autofmt_xdate()
    myFmt = mdates.DateFormatter('%B')
    ax.xaxis.set_major_formatter(myFmt)
    if ylab != 'None':
        plt.ylabel(ylab)
    #plt.legend(loc=2)
    #plt.show()
    return fig
Example #56
0
__author__ = 'Renan Nominato'
__version__ = '0.0.1'
""""
The main purpose of this script it is verify strategies to set a optimal stoploss level. 
In this way, we tested several values of STD
"""

#TODO include pivot and other type of stoploss estimation

import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import math
sns.set_style('dark')


# Generate STD and mean of a dataseries
def get_std_mean(dtf, window: int):
    # Create a window specified by the user
    dft_std = dtf['Close'].rolling(window).std()
    dft_mean = dtf['Close'].ewm(span=window).mean()
    #dft_mean = dtf['Close'].rolling(window).mean()
    return [
        dft_std.fillna(value=dft_std.mean()),
        dft_mean.fillna(value=dft_mean.mean())
    ]


#df = pd.read_pickle('XRB-BTC_T.pkl')
df = pd.read_csv('BTCUSDT08_04_sec.csv')
Example #57
0
def PeakFit_likelihood(Likelihood_cut: pd.DataFrame,
                       mass_energy: pd.DataFrame,
                       cutval,
                       plots=True,
                       constant_mean=True,
                       constant_width=True,
                       classifier_name='Likelihood',
                       CB=True,
                       Gauss=False,
                       bkg_comb=True,
                       bkg_exp=False,
                       bkg_cheb=False):
    print('Starting fit...')
    matplotlib.use('Agg')
    # Check if we have mass in MeV or GeV
    if np.mean(mass_energy) > 1000:
        normalization_mass = 1000
    else:
        normalization_mass = 1
    sns.set_style("whitegrid")  # White background on plot
    prediction = Likelihood_cut  # rename to prediction
    # Set range
    mZmin = 60.0
    mZmax = 130.0
    # Number of bins
    NbinsZmass = 100

    #Initiate the mass variable
    m_ee = ROOT.RooRealVar("m_ee", "Invariant mass (GeV/c^{2})", mZmin, mZmax)
    m_ee.setRange("MC_mZfit_range", mZmin, mZmax)

    # =============================================================================
    #    fit signal
    # =============================================================================

    # Make a mask in the signal range. Prediction is 0 or 1, so above 0.5 is signal
    mask_mass = (mass_energy / normalization_mass > mZmin) & (
        mass_energy / normalization_mass < mZmax) & (prediction > 0.5)
    Z_mass_signal = np.array(mass_energy[mask_mass] / normalization_mass)
    #Make np.array

    # Initiate 1D histogram
    h_mZ_all = ROOT.TH1D("h_mZ_all", "Histogram of Z mass", NbinsZmass, mZmin,
                         mZmax)

    for isample in range(Z_mass_signal.shape[0]):
        score = Z_mass_signal[isample]
        h_mZ_all.Fill(score)

    # Constructs histogram with m_ee as argument from the 1d histogram h_mZ_all
    mc_Zee_mZ = ROOT.RooDataHist("mc_Zee_mZ", "Dataset with Zee m_ee",
                                 RooArgList(m_ee), h_mZ_all)

    # Define variables for the fits.
    # BW: Breit-Wigner. CB: Crystal-Ball
    meanBW = ROOT.RooRealVar("meanBW", "meanBW", 91.1876, 60.0, 120.0)
    #91.1876
    meanBW.setConstant(True)
    # this is a theoretical constant

    sigmaBW = ROOT.RooRealVar("sigmaBW", "sigmaBW", 2.4952, 2.0, 20.0)
    #2.4952
    sigmaBW.setConstant(True)
    # this is a theoretical constant
    # if constant_mean:

    func_BW = ROOT.RooBreitWigner("func_BW", "Breit-Wigner", m_ee, meanBW,
                                  sigmaBW)
    # Make the function from the constants

    # Crystal ball
    if CB:
        meanCB = RooRealVar("meanCB", "meanCB", -0.0716, -10.0, 10.0)
        # meanCB.setConstant(True) #if commented out, it can float between the minimum and maximum
        sigmaCB = RooRealVar("sigmaCB", "sigmaCB", 0.193, 0, 15)
        # sigmaCB.setConstant(True)
        alphaCB = RooRealVar("alphaCB", "alphaCB", 1.58, 0.0, 10.0)
        # alphaCB.setConstant(True)
        nCB = RooRealVar("nCB", "nCB", 0.886, -10, 50.0)
        # nCB.setConstant(True)
        func_sig_CB = RooCBShape("func_CB", "Crystal Ball", m_ee, meanCB,
                                 sigmaCB, alphaCB, nCB)
        # Define Crystal-Ball function
    # Gaussian
    elif Gauss:  # Use Gaussian if True in function call
        meanGA = RooRealVar("meanGA", "meanGA", 10.0, -10.0, 10.0)
        sigmaGA = RooRealVar("sigmaGA", "sigmaGA", 3.0, 0.01, 10.0)
        if constant_width:
            sigmaGA.setConstant(True)

        nGA = RooRealVar("nGA", "nGA", 1.5, 0.0, 20.0)
        func_GA = RooGaussian("func_GA", "Gaussian", m_ee, meanGA, sigmaGA)
        #, nGA);

    if CB:  # Convolute Breit-Wigner and Crystal-Ball
        print("Convoluting a Crystal-Ball and Breit-Wigner for signal")
        func_BWxCB_unextended = RooFFTConvPdf("func_BWxCB_unextended",
                                              "Breit-Wigner (X) Crystal Ball",
                                              m_ee, func_BW, func_sig_CB)

    elif Gauss:  # Convolute Breit-Wigner and Gauss
        print("Convoluting a Gauss and Breit-Wigner for signal")
        func_BWxCB_unextended = RooFFTConvPdf("func_BWxCB_unextended",
                                              "Breit-Wigner (X) Gaussian",
                                              m_ee, func_BW, func_GA)

    else:  # only Breit-Wigner fit on the signal
        print("Fitting only with Breit-Wigner for signal")
        func_BWxCB_unextended = func_BW

    m_ee.setRange("MC_mZfit_range", 85, 97)
    # Set the fit range for the signal

    nsig = RooRealVar("ntotal", "ntotal", 1000, 0, 10e6)
    # Define the variable for the number of signal
    func_BWxCB = ROOT.RooExtendPdf("signal_func_Zee", "signal_func_Zee",
                                   func_BWxCB_unextended, nsig)
    # Adding the nsig term to the pdf

    func_BWxCB.fitTo(mc_Zee_mZ, RooFit.Range("MC_mZfit_range"))
    # Fit the signal

    if plots:  # Plots the signal using the function "root_plot" defined above
        mc_Zee_signal = root_plot(m_ee=m_ee,
                                  distribution=mc_Zee_mZ,
                                  fit=func_BWxCB,
                                  mZmin=mZmin,
                                  mZmax=mZmax,
                                  title=f'signal for cut {cutval}')
#cut {cutval}
# =============================================================================
#    background
# =============================================================================

    nbkg = RooRealVar("nbkg", "nbkg", 1000, 0, 10e6)
    # Define the variable for the number of background

    #if True:
    m_ee.setRange("MC_mZfit_range", mZmin, mZmax)
    # Set range for fit as defined in the beginning
    c_bkg_mZ = ROOT.TCanvas("c_bkg_mZ", "", 0, 0, 1000, 500)
    # Make the canvas for plotting

    Z_mass_background = np.array(mass_energy[mask_mass] / normalization_mass)
    # Mask for background
    h_mZWenu_all = ROOT.TH1D("h_mZ_all", "Histogram of Z mass", NbinsZmass,
                             mZmin, mZmax)
    # Initiate 1D histogram

    for isample in range(Z_mass_background.shape[0]):
        score = Z_mass_background[isample]
        h_mZWenu_all.Fill(score)

    # Create the lin + exponential fit
    lam = RooRealVar("lambda", "Exponent", -0.04, -5.0, 0.0)
    func_expo = ROOT.RooExponential("func_expo", "Exponential PDF", m_ee, lam)

    #coef_pol1 =  RooRealVar("coef_pol1", "Slope of background", 0.0, -10.0, 10.0);
    #func_pol1 = ROOT.RooPolynomial("func_pol1", "Linear PDF", m_ee, RooArgList(coef_pol1));

    # Create Chebychev polymonial
    a0 = RooRealVar("a0", "a0", -0.4, -5.0, 5.0)
    a1 = RooRealVar("a1", "a1", -0.03, -5.0, 5.0)
    a2 = RooRealVar("a2", "a2", 0.02, -5.0, 5.0)
    a3 = RooRealVar("a3", "a3", 0.02, -5.0, 5.0)

    # Polynomials with different order
    func_Cpol1 = RooChebychev("func_Cpol1",
                              "Chebychev polynomial of 1st order", m_ee,
                              RooArgList(a0, a1))
    func_Cpol2 = RooChebychev("func_Cpol2",
                              "Chebychev polynomial of 2nd order", m_ee,
                              RooArgList(a0, a1, a2))
    func_Cpol3 = RooChebychev("func_Cpol3",
                              "Chebychev polynomial of 3rd order", m_ee,
                              RooArgList(a0, a1, a2, a3))
    f_exp_mZ = RooRealVar("N_lin_mZ", "CLinear fraction", 0.50, 0, 1)

    m_ee.setRange("low", 60, 70)
    m_ee.setRange("high", 110, 130)

    # Adding exponential and Chebychev if comb:
    if bkg_comb:
        func_ExpLin_mZ_unextended = ROOT.RooAddPdf(
            "func_ExpLin_mZ_unextended", "Exponential and Linear PDF",
            RooArgList(func_Cpol3, func_expo), RooArgList(f_exp_mZ))
    elif bkg_exp:
        func_ExpLin_mZ_unextended = func_expo
    elif bkg_cheb:
        func_ExpLin_mZ_unextended = func_Cpol3
    else:
        print("No background fit called. Exiting")
        return None

    func_ExpLin_mZ = ROOT.RooExtendPdf("func_ExpLin_mZ", "func_ExpLin_mZ",
                                       func_ExpLin_mZ_unextended, nbkg)
    # Adding the nbkg term to the pdf
    # Constructs histogram with m_ee as argument from the 1d histogram h_mZ_all
    mc_Wenu_mZ = ROOT.RooDataHist("mc_Zee_mZ", "Dataset with Zee m_ee",
                                  RooArgList(m_ee), h_mZWenu_all)
    func_ExpLin_mZ.fitTo(mc_Wenu_mZ, RooFit.Range("MC_mZfit_range"))
    #ROOT.RooFit.Range("low,high")); # Fits background

    #Plotting background
    residue = root_plot(m_ee=m_ee,
                        distribution=mc_Wenu_mZ,
                        fit=func_ExpLin_mZ,
                        mZmin=mZmin,
                        mZmax=mZmax,
                        title=f'Background for cut {cutval}')
    #
    # =============================================================================
    #    Combining signal and background
    # =============================================================================

    m_ee.setRange("MC_mZfit_range", mZmin, mZmax)

    Z_mass = np.array(mass_energy[mask_mass] / normalization_mass)
    h_mZWenu = ROOT.TH1D("h_mZ_all", "Histogram of Z mass", NbinsZmass, mZmin,
                         mZmax)

    for isample in range(Z_mass.shape[0]):
        score = Z_mass[isample]
        h_mZWenu.Fill(score)

    # Constructs histogram with m_ee as argument from the 1d hist ogram h_mZ_all
    mc_ZeeWenu_mZ = ROOT.RooDataHist("mc_Zee_mZ", "Dataset with Zee m_ee",
                                     RooArgList(m_ee), h_mZWenu)

    ## Fits the data and returns the fraction of background
    f_bkg_mZ = RooRealVar("f_bkg_mZ", "Signal fraction",
                          nbkg.getVal() / nsig.getVal(), 0.0, 1)

    ## Combining the signal and background fits
    func_SigBkg_mZ_unextended = ROOT.RooAddPdf(
        "func_SigBkg_mZ", "Signal and Background PDF",
        RooArgList(func_ExpLin_mZ_unextended, func_BWxCB_unextended),
        RooArgList(f_bkg_mZ))
    # func_SigBkg_mZ_unextended = func_BWxCB_unextended;#ROOT.RooAddPdf("func_SigBkg_mZ", "Signal and Background PDF", RooArgList(func_BWxCB_unextended, func_BWxCB_unextended), RooArgList(f_bkg_mZ));
    ntotal = RooRealVar("ntotal", "ntotal", 10000, 0, 10e6)
    func_SigBkg_mZ = ROOT.RooExtendPdf("func_ExpLin_mZ", "func_ExpLin_mZ",
                                       func_SigBkg_mZ_unextended, ntotal)

    func_SigBkg_mZ.fitTo(mc_ZeeWenu_mZ)
    # Fits the full data set

    if plots:
        mc_ZeeWenu_mZ_resid = root_plot(m_ee=m_ee,
                                        distribution=mc_ZeeWenu_mZ,
                                        fit=func_SigBkg_mZ,
                                        mZmin=mZmin,
                                        mZmax=mZmax,
                                        title=f'Bkg+Sig for cut {cutval}')

    # Baseline ntotal = 41231 (Data)
    # fraction 0.9333
    # Baseline ntotal = 74747 (MC)
    # fraction 0.4427
    # Malte script len(Z_mass)
    bkg = len(Z_mass) * f_bkg_mZ.getVal()
    sig = len(Z_mass) * (1 - f_bkg_mZ.getVal())
    # print(f_bkg_mZ.getVal())

    #### DATA ####
    BL_sig = 71075 * (
        1 - 0.4049
    )  # BL = baseline, the number is the fraction of bkg in baseline
    BL_bkg = 71075 * 0.4049  # BL = baseline

    bkg_ratio = bkg / BL_bkg
    sig_ratio = sig / BL_sig

    max_residue = max(abs(mc_ZeeWenu_mZ_resid.getYAxisMax()),
                      abs(mc_ZeeWenu_mZ_resid.getYAxisMin()))
    # print(max_residue)
    # print(bkg_ratio)
    # print(sig_ratio)

    if (bkg_ratio < 1.009) & (sig_ratio < 1.009) & (abs(
            mc_ZeeWenu_mZ_resid.getYAxisMin()) < 4.5) & (abs(
                mc_ZeeWenu_mZ_resid.getYAxisMax()) < 4.5):
        # input('....')

        return BL_sig, BL_bkg, sig_ratio, bkg_ratio  #max_residue, ntotal.getVal(), nsig.getVal(), nbkg.getVal()return sigmaCB if CB else sigmaGA #sig_ratio, sigma_sig, bkg_ratio, sigma_bkg
    else:
        return 0, 0, 0, 0
Example #58
0
def plot_climatology(nc_file, var_name, level='none', ylab='None'):
    """Given Wallerfing climatological JULES model output plots given variable climatology.

    :param nc_file: File location for JULES output.
    :type nc_file: str
    :param var_name: Name of variable to plot.
    :type var_name: str
    :param level: If variable is 4D specify 4th dimension.
    :type level: int
    :param ylab: Label for Y-axis.
    :type ylab: str
    :return: Figure object to save.
    :rtype: object
    """
    sns.set_context('poster',
                    font_scale=1.2,
                    rc={
                        'lines.linewidth': 1,
                        'lines.markersize': 10
                    })
    fig, ax = plt.subplots(
        nrows=1,
        ncols=1,
    )  # figsize=(15, 5))
    sns.set_style('whitegrid')
    palette = sns.color_palette("colorblind", 11)
    dat = open_nc(nc_file)
    lats, lons, var, time = extract_vars_nc(dat, var_name)
    times = nc.num2date(time[:], time.units)
    idx = np.where(
        [times[x].year == times[366 * 8].year for x in range(len(times))])[0]
    time_x = times[idx]
    plt_var = var[:]
    plt_var[plt_var > 1e18] = np.nan
    depths = [100, 250, 650, 2000]
    #depths = [150, 350, 650, 2000]
    labels = ['0 - 0.1m', '0.1 - 0.35m', '0.35 - 1m', '1 - 3m']
    #if level in [0,1,2,3]:
    #    ax.plot(times[0:365], plt_var[0:365, level]/depths[level], label='wfdei', color=palette[0])
    #else:
    for yr in xrange(times[0].year, times[-1].year):
        idx = np.where([times[x].year == yr for x in range(len(times))])[0]
        #  print len(idx)
        if var_name == 'smcl':
            ax.plot(
                time_x[0:364 * 8],
                plt_var[idx[0]:idx[364 * 8], level, 0, 0] / depths[level],
            )
        elif level != 'none':
            ax.plot(
                time_x[0:364 * 8],
                plt_var[idx[0]:idx[364 * 8], level, 0, 0],
            )
        else:
            ax.plot(
                time_x[0:364 * 8],
                plt_var[idx[0]:idx[364 * 8], 0, 0],
            )
    #plt.ylabel('Volumetric soil water content (m3 m-3)')
    plt.xlabel('Date')
    plt.gcf().autofmt_xdate()
    myFmt = mdates.DateFormatter('%B')
    ax.xaxis.set_major_formatter(myFmt)
    if ylab != 'None':
        plt.ylabel(ylab)
    #plt.legend(loc=2)
    #plt.show()
    return fig
Example #59
0
from scrawalPage import *
import matplotlib.pyplot as plt
import seaborn as sns
Roomlist = Roomlist_download()
print(Roomlist.head())
Roomlist_group = Roomlist.groupby(
    'RoomName').count().reset_index().sort_values(by=['RoomPrice'],
                                                  ascending=False)
print(Roomlist_group)
sns.set_style('whitegrid')
sns.barplot(x='RoomName', y='RoomPrice', data=Roomlist_group[:10])
plt.show()

def getActivityTOD(folders,
                   dbs,
                   switch='fuel',
                   sector_name='electric',
                   save_data='N',
                   create_plots='N',
                   conversion=277.777778,
                   run_name=''):
    #    inputs:
    #    1) folders         - paths containing dbs (list or single string if all in the same path)
    #    2) dbs             - names of databases (list)
    #    3) switch          - 'fuel' or 'tech', basis of categorization
    #    4) sectorName      - name of temoa sector to be analyzed
    #    5) saveData         - 'Y' or 'N', default is 'N'
    #    6) createPlots     - 'Y' or 'N', default is 'N'
    #    7) conversion      - conversion to GWh, default is 277.778 (from PJ)
    #    8) run_name         - Used for saving results in dedicated folder

    #    outputs:
    #    1) activity
    #    2) plots - optional
    #    3) Data  - optional
    # ==============================================================================
    print("Analyzing activity by time of day (TOD)")

    # Save original directory
    wrkdir = os.getcwd()

    # If only a single db and folder provided, change to a list
    if type(dbs) == str and type(folders) == str:
        dbs = [dbs]
        folders = [folders]
    # If a list of folders is provided with one database, only use first folder
    elif type(dbs) == str:
        dbs = [dbs]
        folders = [folders[0]]
    # If only a single folder provided, create a list of the same folder
    elif type(folders) == str:
        fldrs = []
        for db in dbs:
            fldrs.append(folders)
        folders = fldrs

    # Create dataframe to hold each capacity_single series
    activity = pd.DataFrame(dtype='float64')

    # Iterate through each db
    for folder, db in zip(folders, dbs):
        activity_single = SingleDB(folder,
                                   db,
                                   switch=switch,
                                   sector_name=sector_name,
                                   conversion=conversion)
        activity = pd.concat([activity, activity_single])

    # Reset index (remove multi-level indexing, easier to use in Excel)
    activity = activity.reset_index()

    # Directory to hold results
    if save_data == 'Y' or create_plots == 'Y':
        tt.create_results_dir(wrkdir=wrkdir, run_name=run_name)

    # Save results to CSV
    if save_data == 'Y':
        # Create savename based on switch
        if switch == 'fuel':
            savename = 'activityTOD_by_fuel.csv'
        else:
            savename = 'activityTOD_by_tech.csv'
        activity.to_csv(savename)

    if create_plots == 'Y':

        df = activity.reset_index()

        import matplotlib.pyplot as plt
        import seaborn as sns

        for database in df.database.unique():
            # new figure
            plt.figure()
            # set aesthetics
            sns.set_style(
                "white", {
                    "font.family": "serif",
                    "font.serif": ["Times", "Palatino", "serif"]
                })
            sns.set_context("talk")

            # select relevant database
            df2 = df[(df.database == database)]
            # plot
            sns.relplot(x='tod',
                        y='value',
                        hue='fuelOrTech',
                        row='year',
                        col='season',
                        data=df2,
                        kind='line')

            # save
            if switch == 'fuel':
                savename = 'yearlyActivityTOD_byFuel' + tt.remove_ext(
                    database) + '.pdf'
            else:
                savename = 'yearlyActivityTOD_byTech' + tt.remove_ext(
                    database) + '.pdf'
            plt.savefig(savename, dpi=resolution)
            # close the figure
            plt.close()

    # Return to original directory
    os.chdir(wrkdir)

    return activity