Example #1
0
def draw_zipcenters(zctacodes, zctashapes, dfl, colorcol='', gamma=1.0):
    '''
    Needs to be vetted..
    '''
    dfl_zctashapes, dfl_zctacenters, dfl_cities = \
        zctas_for_dfl(zctacodes, zctashapes, dfl)

    # set style:
    sns.set(style="white", color_codes=True, font_scale=1.5)
#    sns.color_palette("Blues")

    # plot:
#    plt.figure()

    # determine what column to color by:
    if len(colorcol)>0:
        c = dfl[colorcol].copy()#**gamma
        plt.scatter(dfl_zctacenters['longitude'],\
                    dfl_zctacenters['latitude'],c=c, norm=colors.PowerNorm(gamma=gamma), cmap='Reds')
    else:
        plt.scatter(dfl_zctacenters['longitude'],\
                    dfl_zctacenters['latitude'])

    plt.colorbar()
#    plt.show()

    # return to default (this is a hack..)
    sns.set(style="darkgrid", color_codes=True, font_scale=1.5)

    return dfl_zctashapes, dfl_zctacenters
def plot_by_groups(df, plot_dir, af_key, config):
    """Plot allele frequencies of grouped/paired samples.
    """
    out_file = os.path.join(plot_dir, "cohort-group-af-comparison.pdf")
    df["sample_label"] = df.apply(lambda row: "%s\n%s" % (row["group_class"], row["sample"]), axis=1)
    sns.despine()
    sns.set(style="white")
    with PdfPages(out_file) as pdf_out:
        for (cohort, group), cur_df in df.groupby(["cohort", "group"]):
            labels = sorted(list(cur_df["sample_label"].unique()))
            labels.reverse()
            cur_df["sample_label"].categories = labels
            g = sns.violinplot(x=af_key, y="sample_label", data=cur_df, inner=None, bw=.1)
            #sns.swarmplot(x=af_key, y="sample_label", data=cur_df, color="w", alpha=.5)
            try:
                group = int(group)
            except ValueError:
                pass
            g.set_title("%s: %s" % (cohort, group))
            g = _af_violinplot_shared(g)
            pdf_out.savefig(g.figure)
            if config and (cohort, group) in config.group_detailed:
                out_dir = utils.safe_makedir(os.path.join(plot_dir, "detailed"))
                out_file = os.path.join(out_dir, "group-%s-%s.png" % (cohort, group))
                g.figure.savefig(out_file)
            plt.clf()
    return out_file
def plot_corr(file, score, stat, ind_var, brain_type):

    # seaborn
    sns.set(style="white")

    # import the dataframe
    dt = pd.read_csv(file)

    # Compute the correlation matrix
    corr = dt.corr()

    ### Create the matrix figure with seaborn
    # Generate a mask for the upper triangle
    mask = np.zeros_like(corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True

    # Set up the matplotlib figure
    f, ax = plt.subplots(figsize=(len(ind_var),len(ind_var)))

    # Generate a custom diverging colormap
    cmap = sns.diverging_palette(220, 10, as_cmap=True)

    # Draw the heatmap with the mask and correct aspect ratio
    sns.heatmap(corr, mask=mask, cmap=cmap, annot=False, ax=ax)
    plt.subplots_adjust(left= 0.30,bottom=0.30)
    plt.savefig(os.path.join(stat,score, "heatmap_" + score + "_" + stat + "_"+ brain_type + ".png"))
    plt.close()

    return corr
Example #4
0
def draw_zips(zctashapes):

    # set style:
    sns.set(style="white", color_codes=True, font_scale=1.5)

    #   -- input --
    #recs = zctacodes
    shapes = zctashapes
    Nshp    = len(shapes)

#    cm    = plt.get_cmap('Dark2')
#    cccol = cm(1.*np.arange(Nshp)/Nshp)
#    cccol = cm(0*np.arange(Nshp))
    #   -- plot --
    fig     = plt.figure()
    ax      = fig.add_subplot(111)
    for nshp in xrange(Nshp):
        ptchs   = []
        pts     = np.array(shapes[nshp].points)
        prt     = shapes[nshp].parts
        par     = list(prt) + [pts.shape[0]]
        for pij in xrange(len(prt)):
            ptchs.append(Polygon(pts[par[pij]:par[pij+1]]))
#        ax.add_collection(PatchCollection(ptchs,facecolor=cccol[nshp,:],edgecolor='k', linewidths=.1))
            ax.add_collection(PatchCollection(ptchs,edgecolor='k', facecolor='w', linewidths=.1))
    ax.set_xlim(-91,-82)
    ax.set_ylim(41,48)

    # return to default (this is a hack..)
    sns.set(style="darkgrid", color_codes=True, font_scale=1.5)

    # how scale colors?
    # how add colorbar?

    return ax
Example #5
0
def UseSeaborn(palette='deep'):
    """Call to use seaborn plotting package
    """
    import seaborn as sns
    #No Background fill, legend font scale, frame on legend
    sns.set(style='whitegrid', font_scale=1.5, rc={'legend.frameon': True})
    #Mark ticks with border on all four sides (overrides 'whitegrid')
    sns.set_style('ticks')
    #ticks point in
    sns.set_style({"xtick.direction": "in","ytick.direction": "in"})

    # sns.choose_colorbrewer_palette('q')

    #Nice Blue,green,Red
    # sns.set_palette('colorblind')
    if palette == 'xkcd':
        #Nice blue, purple, green
        sns.set_palette(sns.xkcd_palette(xkcdcolors))
    else:
        sns.set_palette(palette)
    #Nice blue, green red
    # sns.set_palette('deep')

    # sns.set_palette('Accent_r')
    # sns.set_palette('Set2')
    # sns.set_palette('Spectral_r')
    # sns.set_palette('spectral')

    #FIX INVISIBLE MARKER BUG
    sns.set_context(rc={'lines.markeredgewidth': 0.1})
Example #6
0
def generate_plot(csv_file_name, plot_file_name, x, y, hue, y_title, xticklabels_rotation=90):
    sns.set(font_scale=1.5)

    sns.set_style("white", {"legend.frameon": True})

    df = pd.read_csv(csv_file_name)

    ax = sns.barplot(data=df, x=x, y=y, hue=hue, palette=sns.color_palette("Paired"))
    ax.set_xlabel('')
    ax.set_ylabel(y_title)

    labels = ax.get_xticklabels()
    ax.set_xticklabels(labels, rotation=xticklabels_rotation)

    fig = ax.get_figure()

    if hue:
        legend = ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
        legend.set_label('')

        fig.savefig(plot_file_name, bbox_extra_artists=(legend,), bbox_inches='tight')
        fig.savefig(plot_file_name + '.jpg', bbox_extra_artists=(legend,), bbox_inches='tight')
    else:
        fig.tight_layout()

        fig.savefig(plot_file_name)
        fig.savefig(plot_file_name + '.jpg')

    plt.clf()
    plt.close('all')
def plotTradeVsNews(tickName):
    path2 = "resultsMKII"
    frame = getNewsNTradingVol(tick_Name,path2)
    newsBuz = []
    tradingVol = []
    newsVol = []
    for i in range(len(frame['tradingVol'])):
        newsBuz.append(frame['NewsBuz'].values[i])
        tradingVol.append(np.log(frame['tradingVol'].values[i]))
        newsVol.append(np.log(frame['NewsVol'].values[i]))
    sns.set(style="ticks")
    x = np.array(newsBuz)
    y = np.array(tradingVol)
    ax = sns.jointplot(x,y,kind="hex",stat_func=kendalltau,color="#4CB391")
    ax.set_axis_labels(xlabel= "News Buz",ylabel="Trading Volume")
    g = sns.jointplot(x, y, kind="kde", size=7, space=0)
    g.set_axis_labels(xlabel= "News Buz",ylabel="Trading Volume")

    x = np.array(newsVol)
    ay = sns.jointplot(x,y,kind="hex",stat_func=kendalltau,color="#4CB391")
    ay.set_axis_labels(xlabel= "News Volume",ylabel="Trading Volume")

    h = sns.jointplot(x, y, kind="kde", size=7, space=0)
    h.set_axis_labels(xlabel= "News Volume",ylabel="Trading Volume")
    sns.plt.show()
    # sns.plt.subplot(2,1,1)#41B3D3
    # a1 = sns.regplot(x="NewsBuz", y="tradingVol", data=frame,ci=None,fit_reg=False,color="#1dad9b")
    # a1.set_ylim([0,4e8])
    # sns.plt.subplot(2,1,2)
    #
    # a2 = sns.regplot(x="NewsVol", y="tradingVol", data=frame,ci=None,fit_reg=False,color="#41B3D3")
    # a2.set_ylim([0,4e8])
    sns.plt.show()
Example #8
0
    def load_expression_figure(self, gene_id):        
        self.f = Figure(figsize=figure_size, dpi=100)
        a = self.f.add_subplot(111)        
        gene_data = self.exp_df.loc[gene_id]
        #sb.set_style("whitegrid")
        sb.set(rc={"axes.facecolor": window_background, "figure.facecolor": window_background,
                   "grid.color": "#ecf0f1"})        
        
        #treat_colors = {"acute": "#e74c3c", "persistent": "#3498db", "reactivation": "#9b59b6",
        #        "mock": "#2ecc71"}
        
        treat_colors = {"carp1": "#e74c3c", "carp2": "#e74c3c", "carp3": "#e74c3c",
                        "carp4": "#3498db", "carp5": "#3498db", "carp6": "#3498db",
                        "carp7": "#9b59b6", "carp8": "#9b59b6", "carp9": "#9b59b6",
                        "carp11": "#2ecc71", "carp12": "#2ecc71"}

        p = sb.barplot(ax=a, data=gene_data, x="sample", y="RPKM", 
                   palette=treat_colors, alpha=0.75)
                   
        p.set(ylabel=self.expression_y_label)
        p.set(xlabel="")
                   
        # remove previous figure
        if hasattr(self, "canvas"):
            self.canvas.get_tk_widget().grid_forget()
            
        self.canvas = FigureCanvasTkAgg(self.f, master=self.figure_frame)
        self.canvas.get_tk_widget().grid(row=1, column=0, padx=0, pady=0, sticky="ew") 
def clust_stability(log2_expdf_gene, iterations=16):
    sns.set(context='poster', font_scale = 1)
    sns.set_palette("RdBu_r")
    stability_ratio = []
    total_genes = len(log2_expdf_gene.columns.tolist())
    end_num = 1000
    iter_list = range(100,int(round(end_num)),int(round(end_num/iterations)))
    for gene_number in iter_list:
        title= str(gene_number)+' genes plot.'
        top_pca = plot_PCA(log2_expdf_gene, num_genes=gene_number, title=title)
        top_pca_by_gene = log2_expdf_gene[top_pca]
        top_pca_by_cell = top_pca_by_gene.transpose()
        cell_linkage, plotted_df_by_gene, col_order = clust_heatmap(top_pca, top_pca_by_gene, num_to_plot=gene_number, title=title)
        if gene_number == 100:
            s1 = col_order
            s0 = col_order
        else:
            s2= col_order
            sm_running = difflib.SequenceMatcher(None,s1,s2)
            sm_first = difflib.SequenceMatcher(None,s0,s2)
            stability_ratio.append((sm_running.ratio(), sm_first.ratio()))
            s1=col_order
        plt.close()
    x= iter_list[1:]
    f, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8), sharex=True)
    y1= [m[0] for m in stability_ratio]
    y2= [m[1] for m in stability_ratio]
    sns.barplot(x, y1, palette="RdBu_r", ax=ax1)
    ax1.set_ylabel('Running ratio (new/last)')
    sns.barplot(x, y2, palette="RdBu_r", ax=ax2)
    ax2.set_ylabel('Ratio to 100')
    plt.savefig(os.path.join(filename,'clustering_stability.pdf'), bbox_inches='tight')
    plt.show()
    plt.close()
    return stability_ratio
Example #10
0
def plot_data(data, value="AverageReturn"):
    if isinstance(data, list):
        data = pd.concat(data, ignore_index=True)
    sns.set(style="darkgrid", font_scale=1.5)
    sns.tsplot(data=data, time="Iteration", value=value, unit="Unit", condition="Condition")
    plt.legend(loc='best').draggable()
    plt.show()
Example #11
0
File: rdf.py Project: ryokbys/nap
def plot_figures(nspcs,rd,agr):
    import matplotlib.pyplot as plt
    import seaborn as sns
    sns.set(context='talk',style='ticks')

    plt.figure(figsize=(8,6))
    x = rd
    y = agr[0,0,:]
    plt.plot(x,y,'r-',label='Total RDF')
    plt.xlabel('Distance (A)')
    plt.ylabel('RDF')
    plt.legend()
    plt.savefig("graph_rdf_total.png", format='png', dpi=300, bbox_inches='tight')

    if nspcs == 1:
        return
    plt.clf()
    fig, axes = plt.subplots(nspcs,nspcs,figsize=(15,10),sharex=True)
    for i in range(nspcs):
        isp = i + 1
        for j in range(nspcs):
            jsp = j + 1
            if j < i:
                axes[i,j].axis('off')
                continue
            ax = axes[i,j]
            y = agr[isp,jsp,:]
            ax.plot(x,y,'r-')
            ax.set_title('{0:d}-{1:d}'.format(isp,jsp))
            if isp==jsp:
                ax.set_xlabel('Distance (A)')
            if isp==1 and jsp==1:
                ax.set_ylabel('RDF')
    plt.savefig("graph_rdfs.png", format='png', dpi=300, bbox_inches='tight')
    return
Example #12
0
File: visr.py Project: CoAxLab/radd
def plot_traces_rts(p, all_traces, rts, names=['A', 'B', 'C', 'D'], tb=1000):
    tr = np.mean(p['tr'])*1e3
    rtkeys = np.sort(rts.keys())
    rt_dists = [np.asarray(rts[k])*1e3-tr for k in rtkeys]
    tb = np.ceil(np.max([np.max(rti) if len(rti)>0 else 0 for rti in rt_dists]))+50
    sns.set(style='white', font_scale=1.5)
    f, axes = build_multi_axis(p, tb=tb)
    clrs = ['#3572C6',  '#c44e52', '#8172b2', '#83a83b']
    for i in range(len(all_traces)):
        for ii, ax in enumerate(axes.flatten()):
            x=np.arange(len(all_traces[i][ii]))
            ax.plot(x, all_traces[i][ii], color=clrs[ii], alpha=.3, lw=.75)

    for i, ax in enumerate(axes.flatten()):
        divider = make_axes_locatable(ax)
        axx = divider.append_axes("top", size=.7, pad=0.01, sharex=ax)
        for spine in ['top', 'left', 'bottom', 'right']:
            axx.spines[spine].set_visible(False)
        axx.set_xticklabels([])
        axx.set_yticklabels([])
        if len(rt_dists[i])<=1:
            continue
        sns.distplot(rt_dists[i], ax=axx, label=k, kde=True, hist=True, color=clrs[i], bins=20)
        text_str='$\mu_{%s}=%.fms$'%(names[i], tr+np.mean(rt_dists[i]))
        ax.text(x[0]-50, np.mean(p['a'])-.1*np.mean(p['a']), text_str, fontsize=21)
def draw_chart(chart_name,measure,axis,val_ordinate,train_ordinate,test_ordinate,dst_folder):
    plt.style.use('seaborn')
    sns.set(font_scale=1.2)
    sns.set_style({'font.family': 'serif'})
    fig, ax = plt.subplots(figsize=(8, 8))
    ttl = ax.title
    ttl.set_position([.5, 1.05])
    plt.tick_params(axis='both', which='major', labelsize=8)
    plt.tick_params(axis='both', which='minor', labelsize=8)

    ax.set_title(' '.join(chart_name.replace('_test_es_50_lr_1e-05_l2_0_0_mc_3_hsize_250','').split('_')[1:]))
    plt.yticks(np.arange(0, 1.1, 0.1))
    ax.set_ylim(0, 1)
    plt.xticks(np.arange(0, 11, 1))
    ax.set_xlim(0, 10)
    plt.grid(True)
    plt.xlabel('epochs')
    plt.ylabel(measure)
    if val_ordinate is not None:
        ax.plot(axis, val_ordinate, color=sns.xkcd_rgb["pale red"], marker='.', label='validation')  # plotting t, a separately
    if train_ordinate is not None:
        ax.plot(axis, train_ordinate,color=sns.xkcd_rgb["medium green"],  marker='.', label='train')  # plotting t, b separately
    if test_ordinate is not None:
        ax.plot(axis, test_ordinate, color=sns.xkcd_rgb["denim blue"], marker='.', label='test')  # plotting t, c separately
    ax.legend()
    plt.savefig(os.path.join(dst_folder,'{}_{}.pdf'.format('_'.join(chart_name.split('_')[1:]).replace('_test_es_50_lr_1e-05_l2_0_0_mc_3_hsize_250',''),measure)),dpi=300,bbox_inches='tight')
Example #14
0
File: visr.py Project: CoAxLab/radd
def plot_rt_dists(simdf, axes=None):
    targets=['A', 'B', 'C', 'D']
    targetColors = dict(zip(targets, ['#3572C6',  '#c44e52', '#8172b2', '#83a83b']))
    sns.set(style='white')
    if axes is None:
        f, axes = plt.subplots(2, 2, figsize=(9, 6), sharex=True)
    axes = axes.flatten()
    for i, ax in enumerate(axes):
        target = targets[i]
        rts = simdf[simdf.choice==target].rt.values
        sns.distplot(rts, kde=False, hist_kws={'alpha':.9}, norm_hist=True, bins=10, ax=ax, color=targetColors[target])
        top = ax.get_ylim()[1]*.75
        ax.text(750, top,  target, color=targetColors[target], fontsize=19)
    x = np.array([0,300,600,900])
    axes = np.asarray(f.axes)
    axes[0].set_ylabel('Probability Mass', fontsize=17)
    axes[2].set_ylabel('Probability Mass', fontsize=17)
    axes[2].set_xlabel('Time (ms)', fontsize=17)
    axes[3].set_xlabel('Time (ms)', fontsize=17)
    for ax in axes.flatten():
        ax.set_title('')
        ax.set_xticks(x)
        ax.set_yticklabels('')
        ax.set_xlim(0,900)
    axes[2].set_xticklabels(x, fontsize=12)
    axes[3].set_xticklabels(x, fontsize=12)
    sns.despine()
Example #15
0
File: visr.py Project: CoAxLab/radd
def build_multi_axis(p, nresp=4, tb=800):
    sns.set(style='white', font_scale=1.5)
    bound = p['a']
    onset = p['tr']
    if hasattr(bound, '__iter__'):
        bound = bound[0]
        onset = onset[0]
    # init figure, axes, properties
    f, axes = plt.subplots(2, 2, figsize=(14, 7), sharex=True, sharey=True, dpi=600)
    f.subplots_adjust(hspace=.1, top=.99, bottom=.05)
    w = tb + 40
    h = bound
    start = onset - 80
    axes=axes.flatten()
    # c=["#e74c3c", '#27ae60', '#4168B7', '#8E44AD']

    for i, ax in enumerate(axes):
        plt.setp(ax, xlim=(start - 1, w + 1), ylim=(0 - (.01 * h), h + (.01 * h)))
        ax.hlines(y=h, xmin=start, xmax=w, color='k')
        ax.hlines(y=0, xmin=start, xmax=w, color='k')
        ax.vlines(x=tb, ymin=0, ymax=h, color='#2043B0', lw=1, linestyle='-', alpha=.35)
        ax.vlines(x=start + 2, ymin=0, ymax=h, color='k')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_xticks([])
        ax.set_yticks([])
    sns.despine(top=True, right=True, bottom=True, left=True)
    return f, axes
Example #16
0
def plot_target_corr(filter_outs, seq_targets, filter_names, target_names, out_pdf, seq_op='mean'):
    num_seqs = filter_outs.shape[0]
    num_targets = len(target_names)

    if seq_op == 'mean':
        filter_outs_seq = filter_outs.mean(axis=2)
    else:
        filter_outs_seq = filter_outs.max(axis=2)

    # std is sequence by filter.
    filter_seqs_std = filter_outs_seq.std(axis=0)
    filter_outs_seq = filter_outs_seq[:,filter_seqs_std > 0]
    filter_names_live = filter_names[filter_seqs_std > 0]

    filter_target_cors = np.zeros((len(filter_names_live),num_targets))
    for fi in range(len(filter_names_live)):
        for ti in range(num_targets):
            cor, p = spearmanr(filter_outs_seq[:,fi], seq_targets[:num_seqs,ti])
            filter_target_cors[fi,ti] = cor

    cor_df = pd.DataFrame(filter_target_cors, index=filter_names_live, columns=target_names)

    sns.set(font_scale=0.3)
    plt.figure()
    sns.clustermap(cor_df, cmap='BrBG', center=0, figsize=(8,10))
    plt.savefig(out_pdf)
    plt.close()
Example #17
0
def plot_filter_seq_heat(filter_outs, out_pdf, whiten=True, drop_dead=True):
    # compute filter output means per sequence
    filter_seqs = filter_outs.mean(axis=2)

    # whiten
    if whiten:
        filter_seqs = preprocessing.scale(filter_seqs)

    # transpose
    filter_seqs = np.transpose(filter_seqs)

    if drop_dead:
        filter_stds = filter_seqs.std(axis=1)
        filter_seqs = filter_seqs[filter_stds > 0]

    # downsample sequences
    seqs_i = np.random.randint(0, filter_seqs.shape[1], 500)

    hmin = np.percentile(filter_seqs[:,seqs_i], 0.1)
    hmax = np.percentile(filter_seqs[:,seqs_i], 99.9)

    sns.set(font_scale=0.3)

    plt.figure()
    sns.clustermap(filter_seqs[:,seqs_i], row_cluster=True, col_cluster=True, linewidths=0, xticklabels=False, vmin=hmin, vmax=hmax)
    plt.savefig(out_pdf)
    #out_png = out_pdf[:-2] + 'ng'
    #plt.savefig(out_png, dpi=300)
    plt.close()
Example #18
0
    def plot_violinplot(self, dest_dir=None):
        sns.set(style='whitegrid')

        fig = plt.figure(figsize=(10, 5))
        ax = fig.add_subplot(1, 1, 1)

        medians = self.intervals.groupby('sample_id')['IDP'].median()
        sample_order = medians.sort_values().index

        ax = sns.violinplot(
            ax=ax, data=self.intervals, x='sample_id', y='IDP',
            order=sample_order,
            # color='Black',
            # width=0.25,
            linewidth=1,
        )
        ax.set_ylim(-5, ax.get_ylim()[1])
        ax.set_title('Coverage per Sample', y=1.02)

        ax.set_xticklabels(ax.get_xticklabels(),
                           rotation=90 if len(self.samples) > 10 else 0)

        self._add_global_IDP_mean_and_median_lines(ax)

        if dest_dir:
            fn = 'coverage_violinplot.png'
            filepath = os.path.join(dest_dir,fn)
            plt.savefig(filepath, bbox_inches='tight', dpi=150)
            plt.close()
            return filepath

        return ax
Example #19
0
def stripplot_to_pdf(data, save_path, x=None, y=None, hue=None,
                     style='whitegrid', fontsize=2, rows=1, cols=1,
                     figsize=(4, 4), **kwargs):
    """ Data plotted as stripplot using seaborn and saved in a pdf
    given in save_path

    Parameters
    ----------
    data : pd.DataFrame or path to csv file
        single or list of data to plot into pdf.

    save_path : str
        Path to save the pdf plot.

    """
    if isinstance(data, basestring):
        data = pd.read_csv(data)

    if isinstance(data, (list, tuple)):
        cols = len(data)

    if not isinstance(data, (list, tuple)):
        data = [data, ]

    sns.set_style(style)
    sns.set(font_scale=fontsize)

    with PdfPages(save_path) as pdf:
        fig, axes = plt.subplots(nrows=rows, ncols=cols, figsize=figsize,
                                 squeeze=True, sharey=True)
        axes = axes.reshape(-1)
        for ax, d in zip(axes, data):
            sns.stripplot(x=x, y=y, hue=hue, data=d, ax=ax, **kwargs)
        pdf.savefig(fig)
        plt.close()
Example #20
0
def  plot_galaxy_and_stars(galaxy, stars):
    
    colors = get_distinct(3)
    single_frame('X [pc]', 'Y [pc]')
    xlim = 60
    pyplot.xlim(-xlim, xlim)
    pyplot.ylim(-xlim, xlim)
    ax = pyplot.gca()

    import numpy as np
    import pandas as pd
    from scipy import stats, integrate
    import matplotlib.pyplot as plt
    import seaborn as sns
    sns.set(color_codes=True)

    p = galaxy.select(lambda x: x<60|units.parsec,["x"])
    p = p.select(lambda x: x>-60|units.parsec,["x"])
    p = p.select(lambda y: y<60|units.parsec,["y"])
    p = p.select(lambda y: y>-60|units.parsec,["y"])
    x = p.x.value_in(units.parsec)
    y = p.y.value_in(units.parsec)
    sns.kdeplot(x, y, ax=ax)
    m = 100*numpy.sqrt(stars.mass/stars.mass.max())
    pyplot.scatter(stars.x.value_in(units.parsec), stars.y.value_in(units.parsec), c=colors[0], s=m, lw=0)
#    pyplot.show()
    pyplot.savefig("Fujii_Comparison_Figure")
Example #21
0
def style(mod = None):
    sb.set(font_scale=0.8)
    # sb.set_style("white")
    # sns.set_style("ticks")
    sb.set_style({'lines.linewidth': 0.3,
                   'axes.labelcolor': '.0',
                   'axes.linewidth': 0.5,
                   'axes.edgecolor': '.2',
                   'axes.facecolor': 'white',
                   'axes.grid': True,
                   'font.family': ['sans-serif'],
                   'font.sans-serif': ['Arial'],
                   'grid.linewidth': 0.5,
                   'grid.color': '.9',
                   'text.color': '.0',
                   'savefig.dpi': 100,
                   'xtick.color': '.0',
                   'ytick.color': '.0',
                   'xtick.color': '.0',
                   'xtick.direction': 'in',
                   'xtick.major.size': 3.0,
                   'xtick.minor.size': 1,
                   'xtick.major.width': 0.5,
                   'xtick.minor.width': 0.5,
                   'xtick.major.pad':3,
                   'ytick.color': '.0',
                   'ytick.direction': 'in',
                   'ytick.major.size': 3.0,
                   'ytick.minor.size': 1,
                   'ytick.major.width': 0.5,
                   'ytick.minor.width': 0.5,
                   'ytick.major.pad':3,
                   # 'axes.labelpad': 0.3,
                   'savefig.transparent': True,
                  })
Example #22
0
File: frame.py Project: TAKSIM/camp
    def createSubOverviewPage(self):
        layout = QtGui.QGridLayout()
        w = QtGui.QWidget()
        sns.set(style="whitegrid")
        f, ax = plt.subplots(figsize=(20, 12))
        canvas = figureCanvas(f)
        canvas.setParent(w)
        sns.set(style="whitegrid")
        q = QtSql.QSqlQuery("""SELECT EXP_DATE, SUM(AMOUNT), SUM(AMOUNT*(1+EXP_RETURN*(datediff(EXP_DATE, SETTLE_DATE)+1)/36500.0)) FROM LIABILITY WHERE EXP_DATE>='%s' GROUP BY EXP_DATE ORDER BY EXP_DATE"""%self.sysdate.date().toPyDate())
        dates, vals = [], []
        x_amt = range(0,1000000000,100000000)
        while q.next():
            dates.append(q.value(0).toDate().toPyDate().isoformat())
            vals.append((q.value(1).toDouble()[0], q.value(2).toDouble()[0]))
        data = pd.DataFrame(vals, index=dates, columns=['Amount', 'Total Return'])
        # Plot the total crashes
        sns.set_color_codes("pastel")
        sns.barplot(x='Total Return', y=dates, data=data,
                    label='Interest', color="b")

        # Plot the crashes where alcohol was involved
        sns.set_color_codes("muted")
        sns.barplot(x='Amount', y=dates, data=data,
                    label="Principal", color="b")

        # Add a legend and informative axis label
        ax.legend(ncol=2, loc="upper right", frameon=True)
        ax.set(ylabel="Maturity Date", title='Liability Overview')
        sns.despine(left=True, bottom=True)

        layout.addWidget(w, 0, 0, 1, 1)
        return layout
Example #23
0
def plot_CellCnn_PR_curves(prec, recall, seq, seq_labels, nclust, plotdir, key):
    sns.set(style="white")
    curr_palette = sns.color_palette("Set1", n_colors=len(seq))

    plt.clf()
    f, ax = plt.subplots()
    for i, nblast in enumerate(seq):
        if (seq_labels == []) or (nclust is None):
            plt.plot(recall[nblast], prec[nblast])
        else:
            if nclust[nblast] == (1,1):
                plt.plot(recall[nblast], prec[nblast], c=curr_palette[i],
                        linestyle = '-',
                        label=seq_labels[i])
            else:
                plt.plot(recall[nblast], prec[nblast], c=curr_palette[i],
                        linestyle = '--',
                        label=seq_labels[i] + ' (%d/%d)' % nclust[nblast])

    
    plt.xlabel('Recall', fontsize=28)
    plt.ylabel('Precision', fontsize=28)
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.05])
    plt.legend(loc='center left', prop={'size':20})
    
    for item in (ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(24)
    plt.tight_layout()
    sns.despine()
    mkdir_p(plotdir)
    plt.savefig(os.path.join(plotdir, key+'_CellCnn_PRcurve.eps'),
                format='eps')
    plt.close()
Example #24
0
def plot_PBMC_boxplots(x, y, int_ctype, le, params, outdir, stim_name,
                        which_filter='max', format='png'):
    
    palette = sns.color_palette("Set1", n_colors=10)
    df_stim = organize_plates(x, y, int_ctype, le, params, yi=1,
                                stim_name=stim_name, which_filter=which_filter)
    df_unstim = organize_plates(x, y, int_ctype, le, params, yi=0,
                                stim_name=stim_name, which_filter=which_filter)
    df = pd.concat([df_stim, df_unstim])
        
    plt.figure()
    sns.set(style="ticks")

    cols = ['CD4+', 'CD8+', 'NK cells', 'B cells', 'monocytes', 'dendritic', 'surf-']
    df.condition = df.condition.astype("category")
    df.condition.cat.set_categories([stim_name, 'control'], inplace=True)
    ax = sns.boxplot(x="", y="cell filter activity", hue="condition",
                    data=df.sort(["condition"]), order = cols,
                    palette=palette, whis='range', sym='')
    ax.legend(prop={'size':19})             

    for item in ([ax.xaxis.label, ax.yaxis.label] +
            ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(20)
    #plt.tight_layout()
    sns.despine(offset=10, trim=True)
    if which_filter =='max':
        fig_path = os.path.join(outdir, stim_name+'_boxplot_positive.'+format)
    else:
        fig_path = os.path.join(outdir, stim_name+'_boxplot_negative.'+format)
    plt.savefig(fig_path, format=format)
    plt.clf()
    plt.close()
Example #25
0
def plot_benchmark_PR_curves(r_cnn, p_cnn, r_outlier, p_outlier, r_mean, p_mean,
                             r_sc, p_sc, nblast, plotdir, key):
        
    sns.set(style="white")
    curr_palette = sns.color_palette()
    col1 = curr_palette[2]
    col2 = curr_palette[1]
    col3 = curr_palette[0]
    col4 = curr_palette[3]
            
    plt.clf()
    f, ax = plt.subplots()
    plt.plot(r_cnn, p_cnn, c=col1, label='CellCnn')
    plt.plot(r_outlier, p_outlier, c=col2, label='outlier')
    plt.plot(r_mean, p_mean, c=col3, label='mean')
    plt.plot(r_sc, p_sc, c=col4, label='sc')
    plt.xlabel('Recall', fontsize=28)
    plt.ylabel('Precision', fontsize=28)
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.05])
    plt.legend(loc='center left' , prop={'size':24})
    for item in (ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(24)
    plt.tight_layout()
    sns.despine()
    mkdir_p(plotdir)
    plt.savefig(os.path.join(plotdir, str(nblast)+'_PR_curve.eps'), format='eps')
    plt.close()
Example #26
0
def splot_data(data, mdata, z, label1, label2, sz, grid_size = 100): #, xls, yls, sz):

    cmap_back = ListedColormap(sns.hls_palette(3, l=.4, s=.1))
    cmap_pts = ListedColormap(sns.hls_palette(3, l=.9, s=.9))

    sns.set(style="white")
    sns.set(style="ticks", font_scale=2.0)
    fig, ax = plt.subplots(figsize=(10,6))
    ax.set_aspect('equal')

    # Decorate the plot
    ax.set_xlabel(label1)
    ax.set_ylabel(label2)
    
    # We need grid points and values to make the colormesh plot
    xx = mdata[:, 0].reshape((grid_size, grid_size))
    yy = mdata[:, 1].reshape((grid_size, grid_size))
    zz = z.reshape((grid_size, grid_size))

    plt.pcolormesh(xx, yy, zz, cmap=cmap_back)
    
    # Now draw the points, with bolder colors.
    plt.scatter(data[:, 0], data[:, 1], c=data[:, 2], s=sz, cmap=cmap_pts)

    sns.despine(offset=0.25, trim=True)
Example #27
0
def plot_percentage(contacts, parameters='Description', t_detail=1, n_t_cells=100,
    save=False, palette='deep', context='notebook'):
    """Plot final percentage of T cells in contact with DC"""
    t_cells_in_contact = contacts.drop_duplicates(['Track_ID', 'Run', parameters])
    contacts_at_t_detail = t_cells_in_contact[t_cells_in_contact['Time'] <= t_detail*60]

    sns.set(style='ticks', palette=palette, context=context)

    total_contacts = contacts_at_t_detail[['Run', parameters]].pivot_table(
        columns=parameters, index='Run', aggfunc=len, fill_value=0)

    normalized_contacts = total_contacts/n_t_cells*100

    sorted_contacts = normalized_contacts.reindex_axis(sorted(
        total_contacts.columns, key=lambda col: total_contacts[col].median()), axis=1)

    ax = sns.violinplot(data=sorted_contacts, cut=0, inner=None, bw=0.75)
    ax.set_xlabel('')
    ax.set_ylabel('% T cells in contact')
    plt.xticks(rotation=45, horizontalalignment='right')

    sns.despine()
    plt.tight_layout()
    plt.show()

    if save == True:
        save = 'raw_violins.csv'

    if save:
        sorted_contacts.to_csv(save)
Example #28
0
def plot_roc_curve(y_test, y_pred):
    """
    Plots ROC curve with FPR on the x-axis and TPR on the y-axis.
    Displays AUC ROC in the legend.

    Paramters
    ---------
    y_test: A pandas.DataFrame
    y_pred: A two dimensional array from get_final_rfc()

    Returns
    -------
    A matplotlib.Axes instance
    """

    sns.set(style="darkgrid", font_scale=2.0)
    fig, ax = plt.subplots(figsize=(10, 6))
    fpr, tpr, _ = roc_curve(y_test, y_pred[:, 1])
    roc_auc = auc(fpr, tpr)

    ax.plot(fpr, tpr, label='Area = ' + str(roc_auc))

    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('ROC Curve')
    ax.legend(loc='best')

    sns.despine(offset=0, trim=True)

    return ax
def statistical_analysis(df):
	""" Check correlation of features to spread """
	#correlation matrix
	corrmat = df.corr()
	f, ax = plt.subplots(figsize=(12, 9))
	hm = sns.heatmap(corrmat, cbar=True, annot=True, square=True, fmt='.2f')
	plt.yticks(rotation=0)
	plt.xticks(rotation=90)

	corrvec = abs(df.corr()['result_spread'].copy())
	print corrvec.sort_values()

	#scatterplot
	sns.set()
	cols = ['result_spread','rush_attempt_diff','turn_diff','yards_diff','third_diff','sack_diff','sack_ydiff','p_attempt_diff']
	sns.pairplot(df[cols], size = 2.5)

	# normality_check(df['result_spread'])
	# normality_check(df['rush_attempt_diff'])
	# normality_check(df['turn_diff'])
	# normality_check(df['yards_diff'])
	# normality_check(df['third_diff'])
	# normality_check(df['sack_diff'])
	# normality_check(df['sack_ydiff'])
	# normality_check(df['poss_diff'])
	# normality_check(df['p_attempt_diff'])
	""" Rush attempt shows light tails but otherwise these main features appear normally distributed """
def main(args):
    logger.debug(args)

    sns.set(style='white', context='talk')

    colours = sns.color_palette(n_colors=5)

    f = fressin.Fressin(os.path.join(FRESSIN_DIR, 'koi_minus_fp.fits'))

    fig, axis = plt.subplots()
    
    x = np.append(f.x, 50)
    bx = np.array(list(zip(x[:-1], x[1:]))).flatten()
    by = by = np.array(list(zip(f.y, f.y))).flatten()

    lw = 2
    axis.errorbar(f.b, f.y, f.e, ls='None', lw=lw, capsize=0.,
            color=colours[0], marker='.')
    axis.plot(bx, by, color=colours[0], lw=lw)

    axis.set_xscale('log')
    axis.xaxis.set_major_locator(plt.LogLocator(subs=[1, 2, 5]))
    axis.xaxis.set_major_formatter(plt.LogFormatter(labelOnlyBase=False))
    axis.set_xlim(0.6, 12)
    axis.set_ylim(0., 0.004)
    axis.set_xlabel(r'Orbital period / days')
    axis.set_ylabel(r'Number of planets per star')
    axis.grid(True)

    fig.tight_layout()
    fig.savefig(os.path.join(
        os.path.dirname(__file__), '..', 'images', 'plots',
        'fressin-comparison.pdf'))
Example #31
0
@author:
Maximilian N. Günther
MIT Kavli Institute for Astrophysics and Space Research, 
Massachusetts Institute of Technology,
77 Massachusetts Avenue,
Cambridge, MA 02109, 
USA
Email: [email protected]
Web: www.mnguenther.com
"""

from __future__ import print_function, division, absolute_import

#::: plotting settings
import seaborn as sns
sns.set(context='paper', style='ticks', palette='deep', font='sans-serif', font_scale=1.5, color_codes=True)
sns.set_style({"xtick.direction": "in","ytick.direction": "in"})
sns.set_context(rc={'lines.markeredgewidth': 1})

#::: modules
import numpy as np
import matplotlib.pyplot as plt
import os
from astropy.io import fits
from collections import OrderedDict

#::: exoworld modules
from exoworlds.lightcurves import expand_flags


Example #32
0
from __future__ import print_function

import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="ticks")
sns.set_context("poster")
plt.rcParams["font.family"] = "Helvetica"
import sys, os
from nested_dict import nested_dict
import pandas as pd
import numpy as np
from pyfasta import Fasta

import util
import argparse


def generate_dbn_react(shape_out=None,
                       species=None,
                       dots=None,
                       savefn_prefix=None,
                       min_len=0):
    dbn = savefn_prefix + '.dbn'
    react = savefn_prefix + '.react'

    shape_dict = util.read_icshape_out(shape_out, pureID=0)
    fa_dict = util.read_fa(fa=None, species=species, pureID=0)
    dot_dict = util.read_dots(dot=dots)
# Standard Scientific Import
from IPython.display import display, HTML, Javascript, set_matplotlib_formats
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib as mpl
from matplotlib import pyplot as plt
from matplotlib.pyplot import plot as plot
import sklearn
from joblib import Parallel, delayed
from numpy import inf, arange, array, linspace, exp, log, power, pi, cos, sin, radians, degrees
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
sns.set()
sns.set_style('whitegrid')
# set_matplotlib_formats('png', 'svg')
set_matplotlib_formats('png', 'pdf')


# Module Imports
from helpers.app_helper import *
from helpers.gmm_helper import *
from helpers.angular_linear import *
from helpers.plot_print_helper import *
from helpers.data_reader import *
from helpers.pre_process import *
from helpers.empirical_helper import *
from lib.lib_loader import *


# Custom Import
Only potential locations that have a frequency in all criteria are considered. 

As a reminder, these were:
- Japanese Restaruant
- Café
- Wine bar
- Clothing Stores

# In[49]:


import seaborn as sns
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(50,25))
sns.set(font_scale=1.1)

ax = plt.subplot(4,1,1)
sns.violinplot(x="Neighborhood", y="Japanese Restaurant", data=manhattan_onehot, cut=0);
plt.xlabel("")

ax = plt.subplot(4,1,2)
sns.violinplot(x="Neighborhood", y="Café", data=manhattan_onehot, cut=0);
plt.xlabel("")

plt.subplot(4,1,3)
sns.violinplot(x="Neighborhood", y="Wine Bar", data=manhattan_onehot, cut=0);

plt.subplot(4,1,4)
sns.violinplot(x="Neighborhood", y="Clothing Store", data=manhattan_onehot, cut=0);
Example #35
0
    def initUI(self):
        pg.setConfigOption('background', 'w')
        params = {
                'figure.figsize': [4, 4],
                'figure.dpi': 300,
                'savefig.dpi': 300
           }
        plt.rcParams.update(params)
        
        sns.set()
        sns.set_style("white")
        sns.set_palette("muted")
        sns.set_context("paper")
        
        self.fullSignal=[]
        self.shiftFullSignal=[]
        self.shiftFullSignalNormal=[]

        self.fSig=''
        
        contain=QSplitter(Qt.Horizontal)
      
        buttons = QtWidgets.QVBoxLayout()
        graphics = QSplitter(Qt.Vertical)
        imaFrac = QtWidgets.QHBoxLayout()
        frac = QtWidgets.QVBoxLayout()
        lagBox = QFormLayout()
        results =  QFormLayout()


        self.btnLoadSig = QPushButton('Load Signal')
        self.btnLoadSig.clicked.connect(self.loadSignal)
        self.btnLoadSig.setStyleSheet("background-color:#fbe9e7; font-size: 18px")
        
        self.lblSignal = QLabel('')
        self.lblSignal.setStyleSheet("font-size: 18px")
        
        self.checkTotalSignal = QCheckBox('Signal')
        self.checkTotalSignal.setStyleSheet("font-size: 18px") 

        self.cmbFractal = QComboBox()
        self.cmbFractal.setStyleSheet("background-color:#fbe9e7; font-size: 18px")
        self.cmbFractal.addItem("Triangle") #Elemento 0
        self.cmbFractal.addItem("Square") #Elemento 1
        self.cmbFractal.addItem("Pentagon") #Elemento 2
        self.cmbFractal.addItem("Hexagon") #Elemento 3
        #self.cmbFractal.addItem("Octgon") #Ahora éste es el elemento 4
        
        self.btnDo = QPushButton("Do Fractal")
        self.btnDo.setDisabled(True)
        self.btnDo.setStyleSheet("font-size: 18px")
        self.btnDo.clicked.connect(self.showDialog)
        
        self.btnFracInter = QPushButton("Points-Inter")
        self.btnFracInter.setDisabled(True)
        self.btnFracInter.setStyleSheet("font-size: 18px")
        self.btnFracInter.clicked.connect(self.update)
        
        self.txtLag = QLineEdit('0')
        self.txtLag.setStyleSheet("font-size: 18px")
        self.txtLag.setEnabled(True)
        lblLag = QLabel("LAG")
        lblLag.setStyleSheet("font-size: 18px")
        
        lagBox.addRow(lblLag,  self.txtLag)
        
        self.btnSub = QPushButton("Graph Poincare")
        self.btnSub.setDisabled(True)
        self.btnSub.setStyleSheet("font-size: 18px")
        self.btnSub.clicked.connect(self.poincSub)
        
        self.lblsd1 = QLabel("SD1: ")
        self.lblsd1.setEnabled(True)  
        self.lblsd1.setStyleSheet("font-size: 18px")
        self.txtsd1 = QLineEdit('')
        self.txtsd1.setEnabled(True)
        self.txtsd1.setStyleSheet("font-size: 18px")
        
        self.lblsd2 = QLabel("SD2: ")
        self.lblsd2.setEnabled(True) 
        self.lblsd2.setStyleSheet("font-size: 18px")
        self.txtsd2 = QLineEdit('')
        self.txtsd2.setEnabled(True)
        self.txtsd2.setStyleSheet("font-size: 18px")
        
        self.lblc1 = QLabel("C1: ")
        self.lblc1.setEnabled(True)  
        self.lblc1.setStyleSheet("font-size: 18px")
        self.txtc1 = QLineEdit('')
        self.txtc1.setEnabled(True)
        self.txtc1.setStyleSheet("font-size: 18px")
        
        self.lblc2 = QLabel("C2: ")
        self.lblc2.setEnabled(True) 
        self.lblc2.setStyleSheet("font-size: 18px")
        self.txtc2 = QLineEdit('')
        self.txtc2.setEnabled(True)
        self.txtc2.setStyleSheet("font-size: 18px")
        
        results.addRow(self.lblsd1, self.txtsd1)
        results.addRow(self.lblsd2, self.txtsd2)
        results.addRow(self.lblc1, self.txtc1)
        results.addRow(self.lblc2, self.txtc2)
        
        self.btnSave = QPushButton("Save Current Data")
        self.btnSave.setDisabled(True)
        self.btnSave.setStyleSheet("font-size: 18px")
        self.btnSave.clicked.connect(self.saveFile)
        
        self.viewBox=pg.GraphicsLayoutWidget()
        self.interFrac = self.viewBox.addViewBox(row=0, col=0, lockAspect=True)
        self.rafFrac = self.viewBox.addViewBox(row=0, col=1, lockAspect=True)
        self.bothFrac = self.viewBox.addViewBox(row=0, col=2, lockAspect=True)
        self.aleatFrac = self.viewBox.addViewBox(row=0, col=3, lockAspect=True)
        
        self.scaInter=pg.ScatterPlotItem()
        self.scaRaf=pg.ScatterPlotItem()
        self.scaBoth=pg.ScatterPlotItem()
        self.scaAleat=pg.ScatterPlotItem()
        

        self.viewBox=pg.GraphicsLayoutWidget()
        self.interFrac = self.viewBox.addPlot()#ViewBox(row=0, col=0, lockAspect=True)
        self.interFrac.setYRange(-0.1, 1.1, padding=0)
        self.interFrac.setXRange(-0.1, 1.1, padding=0)

        self.poinc = self.viewBox.addPlot()#ViewBox(row=0, col=1, lockAspect=True)
        
        self.scaInter=pg.ScatterPlotItem()
        self.scaPoinc=pg.ScatterPlotItem()

        self.roiInter=pg.PolyLineROI([[0.2, 0.5], [0.8, 0.5], [0.5, 0]], pen=(6,9), closed=True)

        imaFrac.addWidget(self.viewBox)
                
        buttons.setSizeConstraint(0)
        buttons.addWidget(self.btnLoadSig)
        buttons.addWidget(self.lblSignal)
        buttons.addWidget(self.checkTotalSignal)
        
        nomFractal = QLabel("Fractal Type")
        nomFractal.setStyleSheet("font-size: 18px")
        buttons.addWidget(nomFractal)
        buttons.addWidget(self.cmbFractal)
        
        buttons.addWidget(self.btnDo)
        buttons.addWidget(self.btnFracInter)
        buttons.addLayout(lagBox)
        buttons.addWidget(self.btnSub)
        buttons.addLayout(results)
        buttons.addWidget(self.btnSave)
        
        frac.addLayout(imaFrac)
        
        self.plot1=pg.PlotWidget()
        fra = QWidget()
        fra.setLayout(frac)

        graphics.addWidget(fra)
        graphics.addWidget(self.plot1)
        bot = QWidget()
        bot.setLayout(buttons)

        contain.addWidget(bot)
        contain.addWidget(graphics)
        self.addWidget(contain)
#lab_vld_name=[inverse_CC[p] for p in list(lab_vld)]
#y_pred_name=[inverse_CC[p] for p in list(y_pred)]

# calculate the confusion matrix
conf_mat = metrics.confusion_matrix(lab_vld, y_pred)

# convert into a pandas dataframe

df_cm = pd.DataFrame(conf_mat,
                     index=[i for i in label_list],
                     columns=[i for i in label_list])

# plt the confusion matrix figure
plt.figure(figsize=(15, 15))
sn.set(font_scale=1.4)  #for label size
sn.heatmap(df_cm, annot=True, annot_kws={"size": 16}, cmap="Blues")
plt.xlabel('Predicted Class')
plt.ylabel('Actual Class')
plt.show()

# calculate 10 fold validation accuracy

# deepcopy all training data
X_ALL_CV = copy.deepcopy(X_ALL)
# normalize each column (each feature vector)
scaler2 = MinMaxScaler()

# fit a normalizer using training data and
# then perform normalization on training data
X_ALL_CV = scaler2.fit_transform(X_ALL_CV)
    def single_feature(self, feauture_count, mean_option):
        # if user chose 'ratios'
        if features_pool == 'ratios':
            features_list = self.ratios[
                2:]  #this ignores the first two columns, which are year and quarter reported
            #create empty lists to store final results
            feature_list = []
            score_list = []
            ticker_list = []

            #looping through all ratios in list
            for f in features_list:
                features = self.ratios_returns[[f]]  #features

                outcome = self.result[[mean_option]]  #outcomes

                x_train, x_test, y_train, y_test = train_test_split(
                    features, outcome, train_size=0.8,
                    shuffle=False)  #splitting dataset for better testing

                model = LinearRegression()  #create model
                model.fit(x_train, y_train)  #fit model
                score = model.score(x_train, y_train)  #score train model
                score_test = model.score(x_test, y_test)  #score test model
                predictions = model.predict(
                    x_train
                )  #pass x-train through predict to determine best fit plane
                coefs = model.coef_  #calculate coefficient - multiplicative factor of each feature to outcome
                intercepts = model.intercept_  #calculate intercept

                sns.set()
                plt.figure(figsize=[10, 10])
                plt.scatter(x_train, y_train, color='darkcyan', alpha=0.4)
                plt.plot(x_train, predictions, color='darkorange')
                plt.title('R^2 of ' + f + ' for ' + user_ticker + "\n" +
                          'y = ' + str(round(coefs[0][0], 5)) + '*x' + ' + ' +
                          str(round(intercepts[0], 3)))
                plt.xlabel('% Δ ' + f + ' YoY')
                plt.ylabel('Maximum Stocks Potential/Quarter')
                plt.savefig(user_path + '/' + features_pool + f + '.png')

                #appending results to new dataFrame for better analysis
                total = pd.DataFrame()
                ticker_list.append(user_ticker)
                total['ticker'] = ticker_list
                print(user_ticker)
                feature_list.append(f)
                total['feature'] = feature_list
                print(f)
                score_list.append(score)
                total['score'] = score_list
                print(score)

            mx_score = total['score'].max()
            list_result = total.loc[total['score'] == mx_score].values.tolist()
            print(list_result)
            ticker_sample.append(list_result[0][0])
            feature_sample.append(list_result[0][1])
            score_sample.append(list_result[0][2])

            max_score[
                'ticker'] = ticker_sample  #indicating list & column location
            max_score['feature'] = feature_sample
            max_score['score'] = score_sample

        #if user chose 'balance', flow similar to ratios
        if features_pool == 'balance':
            features_list = self.balance[2:]

            feature_list = []
            score_list = []
            ticker_list = []

            for f in features_list:
                features = self.balance_returns[[f]]

                outcome = self.result[[mean_option]]

                x_train, x_test, y_train, y_test = train_test_split(
                    features, outcome, train_size=0.8, shuffle=False)

                model = LinearRegression()  #create model
                model.fit(x_train, y_train)  #fit model
                score = model.score(x_train, y_train)  #score train model
                score_test = model.score(x_test, y_test)  #score test model
                predictions = model.predict(
                    x_train
                )  #pass x-train through predict to determine best fit plane
                coefs = model.coef_  #calculate coefficient - multiplicative factor of each feature to outcome
                intercepts = model.intercept_  #calculate intercept

                sns.set()
                plt.figure(figsize=[10, 10])
                plt.scatter(x_train, y_train, color='darkcyan', alpha=0.4)
                plt.plot(x_train, predictions, color='darkorange')
                plt.title('R^2 of ' + f + ' for ' + user_ticker + "\n" +
                          'y = ' + str(round(coefs[0][0], 5)) + '*x' + ' + ' +
                          str(round(intercepts[0], 3)))
                plt.xlabel('% Δ ' + f + ' YoY')
                plt.ylabel('Maximum Stocks Potential/Quarter')
                plt.savefig(user_path + '/' + features_pool + f + '.png')

                total = pd.DataFrame()
                ticker_list.append(user_ticker)
                total['ticker'] = ticker_list
                print(user_ticker)
                feature_list.append(f)
                total['feature'] = feature_list
                print(f)
                score_list.append(score)
                total['score'] = score_list
                print(score)

            mx_score = total['score'].max()
            list_result = total.loc[total['score'] == mx_score].values.tolist()
            print(list_result)
            ticker_sample.append(list_result[0][0])
            feature_sample.append(list_result[0][1])
            score_sample.append(list_result[0][2])

            max_score[
                'ticker'] = ticker_sample  #indicating list & column location
            max_score['feature'] = feature_sample
            max_score['score'] = score_sample

        #if user chose 'income', flow similar to ratios
        if features_pool == 'income':
            features_list = self.income[2:]

            feature_list = []
            score_list = []
            ticker_list = []

            for f in features_list:
                features = self.income_returns[[f]]

                outcome = self.result[[mean_option]]

                x_train, x_test, y_train, y_test = train_test_split(
                    features, outcome, train_size=0.8, shuffle=False)

                model = LinearRegression()  #create model
                model.fit(x_train, y_train)  #fit model
                score = model.score(x_train, y_train)  #score train model
                score_test = model.score(x_test, y_test)  #score test model
                predictions = model.predict(
                    x_train
                )  #pass x-train through predict to determine best fit plane
                coefs = model.coef_  #calculate coefficient - multiplicative factor of each feature to outcome
                intercepts = model.intercept_  #calculate intercept

                sns.set()
                plt.figure(figsize=[10, 10])
                plt.scatter(x_train, y_train, color='darkcyan', alpha=0.4)
                plt.plot(x_train, predictions, color='darkorange')
                plt.title('R^2 of ' + f + ' for ' + user_ticker + "\n" +
                          'y = ' + str(round(coefs[0][0], 5)) + '*x' + ' + ' +
                          str(round(intercepts[0], 3)))
                plt.xlabel('% Δ ' + f + ' YoY')
                plt.ylabel('Maximum Stocks Potential/Quarter')
                plt.savefig(user_path + '/' + features_pool + f + '.png')

                total = pd.DataFrame()
                ticker_list.append(user_ticker)
                total['ticker'] = ticker_list
                print(user_ticker)
                feature_list.append(f)
                total['feature'] = feature_list
                print(f)
                score_list.append(score)
                total['score'] = score_list
                print(score)

            mx_score = total['score'].max()
            list_result = total.loc[total['score'] == mx_score].values.tolist()
            print(list_result)
            ticker_sample.append(list_result[0][0])
            feature_sample.append(list_result[0][1])
            score_sample.append(list_result[0][2])
    corrs_03to08[i], p = cc.pearsonr(x03to08, xmg_03to08)
    minr_03to08[i], maxr_03to08[i] = cc.confidenceinterval95(
        corrs_03to08[i], len(x03to08))

    corrs_09to15[i], p = cc.pearsonr(x09to15, xmg_09to15)
    minr_09to15[i], maxr_09to15[i] = cc.confidenceinterval95(
        corrs_09to15[i], len(x09to15))
    # print("%.4f @ %.1f km " % (p, alts[i]))

# contourplot(corrresult)

print(alts[np.argmax(corrs_02to15)])
print(alts[np.argmax(corrs_03to08)])
print(alts[np.argmax(corrs_09to15)])

sns.set(context="talk", style="white", rc={'font.family': [u'serif']})
colours = ['tangerine', 'blue', 'grass green']
sns.set_palette(sns.xkcd_palette(colours))

fig, ax = plt.subplots(sharey=True, figsize=(8, 5))

plt.plot([0, 0], [20, 60], 'k')

ax.plot(minr_02to15, alts, maxr_02to15, alts, color='black', linewidth=0.5)
ax.fill_betweenx(alts,
                 minr_02to15,
                 maxr_02to15,
                 facecolor='xkcd:tangerine',
                 alpha=0.2)
plt.plot(corrs_02to15, alts, label="2002-2015")
Example #39
0
import numpy as np; np.random.seed(0)
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from pylab import *
from scipy.interpolate import griddata

class Plotter():
    def __init__(self, report):
        """
        histories of every run, could be a dictionary, which maps to the
        according feature?
        e.g. batch sizes have been examined, so we get a dictionary like this:
        report.histories =  {
                            32 : [ [run1],[run2],[run3],[run4],[run5] ],
                            64 : [ [run1],[run2],[run3],[run4],[run5] ],
                            128: [ [run1],[run2],[run3],[run4],[run5] ],
                            256: [ [run1],[run2],[run3],[run4],[run5] ],
                            512: [ [run1],[run2],[run3],[run4],[run5] ]
                            }
        (for the rest i will just assume this structure at hand)
        """
        # number of runs per feature to test
        self.runs = report.run_num
        self.histories = report.histories
        # self.features is a list of features that are tested, e.g. the        batch sizes [32, 64, 128, 256]
        self.features = _get_features()


    # extract features from histories dictionary as a set and converts it
    # to a list which is returned
    def _get_features(self):
"""
Plot full psychometric functions as a function of choice history,
and separately for 20/80 and 80/20 blocks
"""

import pandas as pd
import numpy as np
import sys, os, time
import matplotlib.pyplot as plt
import seaborn as sns
import datajoint as dj
from IPython import embed as shell # for debugging
from scipy.special import erf # for psychometric functions

## INITIALIZE A FEW THINGS
sns.set(style="darkgrid", context="paper", font='Arial')
sns.set(style="darkgrid", context="paper")
sns.set(style="darkgrid", context="paper", font_scale=1.3)

# import wrappers etc
from ibl_pipeline import reference, subject, action, acquisition, data, behavior
from ibl_pipeline.utils import psychofit as psy
from ibl_pipeline.analyses import behavior as behavioral_analyses
from dj_tools import *

figpath  = os.path.join(os.path.expanduser('~'), 'Data/Figures_IBL')

# ================================= #
# GRAB ALL DATA FROM DATAJOINT
# ================================= #
Example #41
0
"""
This program will reduce the dimension of yahoo finance 6 columns into two columns as Price and other features, to analyse if the
the behavior of the Linear regression, and svm model
dropped Adj Close because it is the same with Close column
"""

from sklearn.preprocessing import StandardScaler
import pandas as pd
import pandas_datareader.data as web
from matplotlib import style
import matplotlib.pyplot as plt
import math
import numpy as np
import seaborn as sns; sns.set(font_scale = 1.2)


df = pd.read_csv('./Data/yahoo.csv')
df = df.set_index('Date')
print (df.head())

df.drop(["Adj Close"], axis=1, inplace=True)

print (df.head())

#-----------Data Preparation Section start herer -------------
#df = pd.DataFrame(df)
# seperating the label column and named as label

label = df.iloc[:,3]
label = pd.DataFrame(label)
label.to_csv('./Data/PCA_label.csv')
import seaborn as sns
import matplotlib.pyplot as plt

labels = ['ps_1', 'ps_2', 'ps_3']
df = None
for label in labels:
    ts = pickle.load(open('../../../Data/UMB_trace/synthetic/{}.pickle'\
                          .format(label), 'rb'))['p50']
    temp = pd.DataFrame(ts, columns=['p50'])
    temp['label'] = label
    if df is None:
        df = temp
    else:
        df = pd.concat([df, temp])

# figure
plt.figure(figsize=(9, 9))
sns.set(font_scale=2)
sns.set_style('white')
g = sns.violinplot(x='label', y='p50', data=df)
plt.axhline(y=-2.5, color='red')
g.set(xticklabels=['$\\psi_{s_{measured}}$ > $\\psi_{s_{true}}$',\
                   '$\\psi_{s_{measured}}$ = $\\psi_{s_{true}}$',\
                   '$\\psi_{s_{measured}}$ < $\\psi_{s_{true}}$'])
g.set(xlabel='')
g.tick_params(bottom=False)
g.set_ylabel(ylabel='P50 (MPa)')
g.set(ylim=(-5, 0))
fig = g.get_figure()
fig.savefig('../../../Figures/bias_ps.png', bbox_inches='tight')
Example #43
0
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

import xgboost as xgb

# to evaluate the models
from sklearn.metrics import mean_squared_error

from sklearn.metrics import r2_score

import re

from pandas.tools.plotting import scatter_matrix
import seaborn as sns
sns.set_color_codes()
sns.set(font_scale=1.25)

# to divide train and test set
from sklearn.model_selection import train_test_split

# feature scaling
from sklearn.preprocessing import MinMaxScaler

# feature scaling
from sklearn.preprocessing import StandardScaler

import xgboost as xgb

# to evaluate the models
from sklearn.metrics import roc_auc_score
from sklearn import metrics
"""
Conditional kernel density estimate
===================================

_thumb: .4, .5
"""
import seaborn as sns
sns.set(style="whitegrid")

# Load the diamonds dataset
diamonds = sns.load_dataset("diamonds")

# Plot the distribution of clarity ratings, conditional on carat
sns.displot(
    data=diamonds,
    x="carat", hue="cut",
    kind="kde", height=6,
    multiple="fill", clip=(0, None),
    palette="ch:rot=-.25,hue=1,light=.75",
)
import numpy as np

from datetime import datetime
import pandas as pd

from scipy import optimize
from scipy import integrate

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns


sns.set(style="darkgrid")

mpl.rcParams['figure.figsize'] = (16, 9)
pd.set_option('display.max_rows', 500)

from PIL import Image


def model_sir():
    df_pop=pd.read_csv('data/processed/world_population.csv',sep=";")

    df_data=pd.read_csv('data/processed/COVID_full_flat_table.csv',sep=';')
    df_data=df_data.iloc[60:,:] #removing first 50 days of covid spread as the data is inconsistent
    df_data=df_data.drop(['Taiwan*'], axis= 1) # dropping taiwan as the data is inconsistent

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
sns.set(context='talk', style='white', color_codes=True)
sns.set_palette(['#e41a1c', '#377eb8', '#4daf4a', '#984ea3'])

T = 160  #number of trials for each experimental block

mean_response = np.load('mean_responses_exp.npy')
labels = ['IRI', 'RRI', 'SU-RW', 'DU-RW']

gs = gridspec.GridSpec(nrows=1, ncols=3)

fig = plt.figure(figsize=(12, 4))
fig.subplots_adjust(wspace=.5)

ax1 = fig.add_subplot(gs[0, :2])
ax2 = fig.add_subplot(gs[0, -1])

ax1.plot(np.arange(1, T + 1), mean_response)
sns.despine(fig=fig, ax=ax1)

ax1.legend(labels, title='agent')
ax1.set_xlim([1, 160])
ax1.set_ylim([0, 1])
ax1.vlines([55, 70, 90, 105, 125], 0, 1, 'k', linestyle='--')
ax1.set_ylabel(r'$Pr($choice = correct$)$')
ax1.set_xlabel('trial')

class_number = np.array([9, 16, 1, 1])
Example #47
0
def pairplotting(df):
    sns.set(style='whitegrid', context='notebook')  # set the apearance
    sns.pairplot(df, height=2.5)  # create the pair plots
    plt.show()  # and show them
            'BsmtFullBath', 'BsmtHalfBath'):
    all_data[col] = all_data[col].fillna(0)

# Also for BsmtQual, BsmtCond, BsmtExposure, BsmtFinType1
# and BsmtFinType2
for col in ('BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1',
            'BsmtFinType2'):
    all_data[col] = all_data[col].fillna('None')

# MasVnrArea and MasVnrType also likely mean no veneer,
# so fill them as None and 0 respectively
all_data["MasVnrType"] = all_data["MasVnrType"].fillna("None")
all_data['MasVnrArea'] = all_data['MasVnrArea'].fillna(0)

# Check MSZoning for trends
sns.set(style='darkgrid')
ax = sns.countplot(x=all_data['MSZoning'], data=all_data)
plt.show()

# MSZoning has 'RL' as the most common feature by far
# So just fill it in as RL for the missing data
all_data['MSZoning'] = all_data['MSZoning'].fillna(
    all_data['MSZoning'].mode()[0])

# Utilities is almost all 'AllPub' with 3 exceptions
# So this data seems unhelpful
all_data = all_data.drop(['Utilities'], axis=1)

# Functional description says NA means typical ('Typ')
all_data['Functional'] = all_data['Functional'].fillna('Typ')
Example #49
0
# ## Calculate the Mean and Variances of Daily Gains
print('Stock '+ symbol1 + ' Mean:', stock1["Gain"].mean())
print('Stock '+ symbol1 + ' Variances:', stock1["Gain"].var())

print('Stock '+ symbol2 + ' Mean:', stock2["Gain"].mean())
print('Stock '+ symbol2 + ' Variances:', stock2["Gain"].var())

print('Stock '+ symbol3 + ' Mean:', stock3["Gain"].mean())
print('Stock '+ symbol3 + ' Variances:', stock3["Gain"].var())

print('Stock '+ symbol4 + ' Mean:', stock4["Gain"].mean())
print('Stock '+ symbol4 + ' Variances:', stock4["Gain"].var())

# ## Highest volatality and draw the histogram distribution of daily returns for all the stock
sns.set(rc={"figure.figsize": (15, 10)});
sns.distplot(stock1['Gain'], hist = False, color = 'b' )
sns.distplot(stock2['Gain'], hist = False, color = 'r' )
sns.distplot(stock3['Gain'], hist = False, color = 'g' )
sns.distplot(stock4['Gain'], hist = False, color = 'y' )

# ## Correlation
All_Stocks = pd.concat([stock1['Gain'],stock2['Gain'],stock3['Gain'],stock4['Gain']], axis=1)

names = [symbol1, symbol2, symbol3, symbol4]
All_Stocks.columns = names
All_Stocks = All_Stocks.dropna()

print (All_Stocks.corr())

#Heat map
Example #50
0
        choose = good_sites.loc[(good_sites.include==1),'site']
    elif quality =='all same':
        choose = good_sites.loc[(good_sites.include==1)&(good_sites.comment.isin(['all same'])),'site']
    elif quality =='pure+all same':
        choose = good_sites.loc[(good_sites.include==1)&(good_sites.comment.isin(['only 1', 'all same'])),'site']
    elif quality =='only mixed':
        choose = good_sites.loc[(good_sites.include==1)&(good_sites.comment.isin([np.nan])),'site']
    else :
        raise ValueError('Unknown quality paraneter. Please check input')
    df = df.loc[df.site.isin(choose)]   
    
    return df
    
if __name__ == "__main__":

    sns.set(font_scale = 2, style = 'ticks')
    os.chdir(dir_data)
    smooth_fmc = pd.read_pickle('cleaned_anomalies_11-29-2018/fm_smoothed')
    
    df = pd.read_pickle('vwc')
    ##time filter
    df = df.loc[df.date.dt.year>=2015]
    ## fuel filter
    df = df.loc[~df.fuel.isin(['1-Hour','10-Hour','100-Hour', '1000-Hour',\
                               'Duff (DC)', '1-hour','10-hour','100-hour',\
                               '1000-hour', 'Moss, Dead (DMC)' ])]
    df.drop(df[df.percent>=1000].index, inplace = True)
    
    ## site filter
    fmc = pd.read_csv('fuel_moisture/nfmd_queried_latlon.csv', index_col = 0) 
    zero_lat_lon_sites = fmc[fmc.Latitude==0].index
Example #51
0
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

# data visualisation and manipulation
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
import click as ck
 
#configure
# sets matplotlib to inline and displays graphs below the corressponding cell.
# %matplotlib inline
style.use('fivethirtyeight')
sns.set(style='whitegrid',color_codes=True)

#model selection
from sklearn.metrics import (accuracy_score,precision_score,recall_score,confusion_matrix,
                                roc_curve,roc_auc_score, mean_absolute_error)

#dl libraraies
import keras
from keras.models import Model
from keras.optimizers import Adam
from keras.layers.merge import dot

# specifically for deeplearning.
from keras.layers import (Dense, Dropout, Flatten,Activation,Input,Embedding,
                            Conv2D, MaxPooling2D, BatchNormalization)
Example #52
0
def plot_results(fname,
                 results,
                 param,
                 threshold,
                 output_dir='.',
                 savecsv=False):
    '''
  visualize results in a seaborn annotated heatmap
  '''
    # if there is only the overlap param create a 1d plot.
    if param == 'overlap':
        plot_1d_results(fname,
                        results,
                        param,
                        threshold,
                        output_dir=output_dir,
                        savecsv=savecsv)
        return

    parsed_results, vmax = prepare_table(results, threshold)
    if savecsv:
        parsed_results.to_csv(path.join(output_dir, fname + '_parsed.csv'))
    successes = parsed_results.pivot('overlap', param, 'success')
    successes = (successes / vmax) * 100
    errors = parsed_results.pivot('overlap', param, 'error')

    sns.set()
    f, ax = plt.subplots(figsize=(9, 6))
    # define the color map, red to blue, no gray point
    # very similar to sns.color_palette("coolwarm_r", n_colors=vmax)
    spread = [
        (0, '#bfbfbf'),  # gray, no successes
        (0.01, '#BE2F33'),  # worst red
        (0.49, '#E6CEC2'),  # best red (success lt 50%)
        (0.5, '#C7D4E8'),  # worst blue (success gte 50%)
        (1, '#415DC9')  # best blue
    ]
    vmax += 1
    pmax = 101

    # segmented colorbar with vmax bins
    cmap = clr.LinearSegmentedColormap.from_list('mmap', spread, N=vmax)

    # plot heatmap
    sns.heatmap(successes, annot=errors, fmt='s', \
                linewidths=.5, ax=ax, annot_kws={'rotation':40}, \
                cmap=cmap, cbar_kws={'label': 'success'}, \
                vmin=0, vmax=pmax)

    # center the ticks on each segment of the color bar
    # for some reason seaborn doesn't do this automatically
    cbar = ax.collections[0].colorbar
    tick_locs = np.arange(0, pmax, 25) + 0.5
    tick_str = list(map(lambda x: '%.0f%%' % x, np.arange(0, pmax, 25)))
    tick_str[-1] = tick_str[-1] + ' (%d)' % (vmax - 1)
    cbar.set_ticks(tick_locs)
    cbar.set_ticklabels(tick_str)

    # save fig
    plt.savefig(path.join(output_dir, ('%s.png' % fname)))
    # close in case of later reuse.
    ax.cla()
    f.clf()
    plt.close(f)
Example #53
0
# We'll need numpy for some mathematical operations
import numpy as np
import json
import math
import os
import sys

# Librosa for audio
import librosa

# matplotlib for displaying the output
import matplotlib.pyplot as plt
# And seaborn to make it look nice
import seaborn
seaborn.set(style='ticks')

# and IPython.display for audio output
import IPython.display
from numpyEncoder import *

# --------- use the following commands for the cleanup -----------
''' make sure that .wav files are of same channels + sample rates'''
# concanate .wav files, get rid of silences under -20 decibels and trim -> 11.10mins for each sample on one file
# ffmpeg -f concat -i <( for f in *.wav; do echo "file '$(pwd)/$f'"; done ) output.wav
# ffmpeg -i output.wav -t 8280 -acodec copy output_trimmed.wav
# ffmpeg -i output_trimmed.wav -af silenceremove=0:0:0:-1:1:-20dB sound_input.wav
# ----------------------------------------------------------------

# # 11.10 mins of each artist
# audio_path = 'output.wav'
Example #54
0
get_ipython().magic(u'reload_ext autoreload')
get_ipython().magic(u'autoreload 2')
sys.path.append('../x01_process_data/')
import x02_load_dataframes
import x03_compute_selections

sys.path.append('../settings/')
# from paths_new import FIGURES_DIR
from common import DESIRED_AA_ORD
from common import codon_to_aa_selector
from paper_settings import PAPER_PRESET
from paper_settings import PAPER_FONTSIZE
from paper_settings import save_fig

sns.set(**PAPER_PRESET)

# In[2]:

package_data = x02_load_dataframes.load_packaging_df()
tm_data = x02_load_dataframes.load_thermostability_df()

# In[3]:

tm_selection_dict = x03_compute_selections.compute_tm_selection(package_data,
                                                                tm_data,
                                                                wt_norm=True,
                                                                sum_all=True)

# In[4]:
# Use the 'epoch' as the row index.
df_stats = df_stats.set_index('epoch')

# A hack to force the column headers to wrap.
#df = df.style.set_table_styles([dict(selector="th",props=[('max-width', '70px')])])

# Display the table.
df_stats

import matplotlib.pyplot as plt


import seaborn as sns

# Use plot styling from seaborn.
sns.set(style='darkgrid')

# Increase the plot size and font size.
sns.set(font_scale=1.5)
plt.rcParams["figure.figsize"] = (12,6)

# Plot the learning curve.
plt.plot(df_stats['Training Loss'], 'b-o', label="Training")
plt.plot(df_stats['Valid. Loss'], 'g-o', label="Validation")

# Label the plot.
plt.title("Training & Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.xticks([1, 2, 3, 4])
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
plt.rc("font", size=14)
import seaborn as sns
sns.set(style="white")  #set background of seaborn is white.
sns.set(style="whitegrid", color_codes=True)


def logmodel_prediction(X, y):
    # Divide X and y into two parts
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=2)
    # Check the performance of the logistic regression model
    # 1. training model,
    # 2. According to the model, use X_test as input to generate the variable y_pred.
    logreg = LogisticRegression(max_iter=1000)
    logreg.fit(X_train, y_train.values.reshape(-1))
    y_pred = logreg.predict(X_test)
    # print('Predictive accuracy on top of the test data set: {:.2f}'.format(logreg.score(X_test, y_test))
    print('Train/Test split results:')
    print("Accuracy %2.3f" % accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))

    logit_roc_auc = roc_auc_score(y_test, logreg.predict(X_test))
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import metrics
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.multitest import fdrcorrection

from netneurotools import cluster
from ppmi_snf import directories, structures, utils

from analysis import run_pdatrophy_anova, run_univariate_anova

# set seaborn, numpy, matplotlib, warnings options
sns.set(style='white', context='notebook', font_scale=1.5)
warnings.filterwarnings('ignore', category=PendingDeprecationWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning,
                        message='Calling np.sum')
plt.rcParams['svg.fonttype'] = 'none'
fontd = dict(size=36)

# miscellaneous settings
SAVE_FIGS = True  # whether to save figures
SEED = 1234  # random seed for reproducibility
np.random.seed(SEED)  # set random seed


def compare_nomri_clusters(hdf):
    """
    Compares ALL versus NO-MRI consensus cluster labels
#importing the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

#importing the datasets
dataset=pd.read_csv('test.csv')
dataset1=pd.read_csv('train.csv')

# Revenue(vector of predictions)
f, ax = plt.subplots(3, figsize=(12,7))

sns.set(rc={'figure.figsize':(12,8)})
sns.boxplot(x=dataset1.revenue, ax = ax[0])
ax[0].set_title("revenue Boxplot")
sns.distplot(a=dataset1.revenue, kde = False, ax = ax[1])
ax[1].set_title("revenue Histogram")
sns.distplot(a=np.log1p(dataset1.revenue), kde = False, ax = ax[2])
ax[2].set_title("Log1p transformed revenue Histogram")
f.tight_layout()
# making the natural log values to increase the model performance
dataset1["log_revenue"] = np.log1p(dataset1.revenue)

# Length of every movie
dataset1["title"] = dataset1["title"].fillna("")
dataset["title"] = dataset["title"].fillna("")

dataset1["title_len"] = dataset1["title"].apply(len)
dataset["title_len"] = dataset["title"].apply(len)
# import pymysql  # 直接连接mysql数据库
# import sqlite3 # sqlite3数据库
# import pymongo # mongodb数据库

from flask_bootstrap import Bootstrap
from flask_nav import Nav
from flask_nav.elements import *

import seaborn as sns
import pandas as pd
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'] = ['SimHei']  # 中文字体设置-黑体
plt.rcParams['axes.unicode_minus'] = False  # 解决保存图像是负号'-'显示为方块的问题
sns.set(font='SimHei', font_scale=1.5)  # 解决Seaborn中文显示问题并调整字体大小
'''
# # mysql的orm框架 SQLAlchemy
# # SQLAlchemy对象关系映射 mysql orm配置
# app = Flask(__name__)
# app.debug = True
# # mysql+pymysql会报警告,为了防止报警告,最好用mysql+mysqlconnector
# # app.config['SQLALCHEMY_DATABASE_URI'] = "mysql+pymysql://root:root@localhost:3306/flasktest?charset=utf8"  # 设定数据库目录
# app.config[
#     'SQLALCHEMY_DATABASE_URI'] = "mysql+mysqlconnector://root:root@localhost:3306/flasktest?charset=utf8"  # 设定数据库目录
# app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True
# app.config['SQLALCHEMY_COMMIT_TEARDOWN'] = True
# # app.secret_key = 'qi_fei_de_mumu_123'  # 防止跨站攻击
# app.config.from_pyfile('app/config')  # 防止跨站攻击
# db = SQLAlchemy(app)  # 数据库实例化#mysql orm 数据库应用
# bootstrap = Bootstrap(app)  # 实例化Bootstrap
def QA_VectorBacktest_InterDayOnceTrading_single_fixed_stop(settle_time = '14:57:00',
                                                         fixed_stop_profit_ret = 0.015,
                                                         fixed_stop_loss_ret = -0.005,
                                                         trading_shift = -1,
                                                         comission = 0.00025,
                                                         data = None
                                                        ):
    '''
    data的index为'datetime'[str],含signal列
    '''
    seaborn.set(palette='deep', style='darkgrid')
    def maximum_down(dataframe):
        data = list(dataframe)
        index_j = np.argmax(np.maximum.accumulate(data) - data)  # 结束位置
        index_i = np.argmax(data[:index_j])  # 开始位置
        d = ((data[index_j]) - (data[index_i]))/(data[index_i])  # 最大回撤
        return d,(index_j-index_i)

    data['date'] = list(map(lambda x:str(x[:10]),data.index))
    data['minute'] = list(map(lambda x:str(x[11:]),data.index))

    data['signal'] = np.where(data['minute']==settle_time,0,data['signal'])
    data['signal'] = data['signal'].ffill().fillna(0)
    data['real_return'] = data['close'].pct_change().shift(trading_shift)
    
    data['strategy'] = data['signal']*data['real_return']

    return_table = data.pivot(index='minute', columns='date', values='strategy')
    return_table = (return_table.fillna(0)+1).cumprod(axis=0)
    return_table = return_table.T
    return_table['cum_ret_series'] = list(map(lambda x:list(x),return_table.values))
    return_table = return_table[['cum_ret_series']]
    return_table['series_max'] = list(map(lambda x:max(x),return_table['cum_ret_series']))
    return_table['series_min'] = list(map(lambda x:min(x),return_table['cum_ret_series']))
    return_table['strategy_return_daily'] = list(map(lambda x:x[-1],return_table['cum_ret_series']))
# =============================================================================
#   止盈止损模块(TODO:完善有止盈的情形)
    if (fixed_stop_profit_ret == None)&(fixed_stop_loss_ret != None): 
        return_table['strategy_return_daily'] = np.where(return_table['series_min']<=(fixed_stop_loss_ret+1),(fixed_stop_loss_ret+1),return_table['strategy_return_daily'])
    elif (fixed_stop_profit_ret == None)&(fixed_stop_loss_ret == None):
        pass
# =============================================================================

    return_table['strategy_return_daily'] = np.where(return_table['strategy_return_daily']!=1,return_table['strategy_return_daily']-comission,return_table['strategy_return_daily'])
    return_table['cum_strategy'] = return_table['strategy_return_daily'].cumprod()
# =============================================================================
#     绘图,报告回测结果
    fig = plt.figure(figsize=(20,10))
    figp = fig.subplots(1,1)
    figp.xaxis.set_major_locator(ticker.MultipleLocator(50))
    figp.plot(return_table['cum_strategy'])
    
    return_table['strategy'] = return_table['strategy_return_daily']-1
    annual_rtn  = pow(return_table['cum_strategy'].iloc[-1] / return_table['cum_strategy'].iloc[0], 250/len(return_table) ) -1
    return_table['ex_pct_close'] = return_table['strategy'] - 0.02/252
    
    if return_table[(return_table['strategy']>0)|(return_table['strategy']<0)].shape[0] == 0:
        P1,P2,P3,P4,P5,P6,P7 = 0,0,0,0,0,0,0
    else:
        P1 = round(return_table[return_table['strategy']>0].shape[0]/return_table[(return_table['strategy']>0)|(return_table['strategy']<0)].shape[0]*100,2)
        P2 = round(annual_rtn*100,2)
        P3 = round(maximum_down(return_table[['cum_strategy']].values)[0][0]*100,2)
        P4 = round((return_table['ex_pct_close'].mean() * math.sqrt(252))/return_table['ex_pct_close'].std(),2)
        P5 = round(return_table[return_table['strategy']>0]['strategy'].mean() / abs(return_table[return_table['strategy']<0]['strategy'].mean()),2)
        P6 = round(return_table.shape[0]/return_table[return_table['strategy']!=0].shape[0],2)
    
    print('胜率: '+str(P1)+'%')
    print('年化收益率:'+str(P2)+'%')
    print('最大回撤:'+str(P3)+'%')
    print('夏普比率:'+str(P4))
    print('平均盈亏比:'+str(P5))
    print('交易频率(天):'+str(P6))
    
    del return_table['cum_ret_series']
# =============================================================================
    return return_table,[P1,P2,P3,P4,P5,P6]