Exemple #1
0
def _plot_heatmap(call_csv, samples, positions, sample_info, batch_counts):
    def sample_sort(x):
        batch = sample_info[x]["batch"]
        return (-batch_counts.get(batch, 0), batch, x)
    out_file = "%s.png" % os.path.splitext(call_csv)[0]
    df = pd.read_csv(call_csv)
    sv_rect = df.pivot(index="position", columns="sample", values="caller_support")
    sv_rect = sv_rect.reindex_axis(positions, axis=0)
    sv_rect = sv_rect.reindex_axis(["%s: %s" % (sample_info[x]["batch"], x)
                                    for x in sorted(samples, key=sample_sort)],
                                   axis=1)
    fig = plt.figure(tight_layout=True)
    plt.title("Shared structural variant calls for affected and unaffected in regions of interest",
              fontsize=16)
    ax = sns.heatmap(sv_rect, cbar=False,
                     cmap=sns.diverging_palette(255, 1, n=3, as_cmap=True))
    colors = sns.diverging_palette(255, 1, n=3)
    b1 = plt.bar(0, 0, bottom=-100, color=colors[-1])
    b2 = plt.bar(0, 0, bottom=-100, color=colors[0])
    ax.legend([b1, b2], ["affected", "unaffected"], ncol=2,
              bbox_to_anchor=(0.85, 0.995), loc=3)
    plt.setp(ax.get_xticklabels(), fontsize=8)
    plt.setp(ax.get_yticklabels(), fontsize=8)
    fig.set_size_inches(20, 8)
    fig.savefig(out_file)
def plotGraphicalCorrelationMatrix(data):
    '''
        Input : data
        Output : graphical correlation matrix
        Inspired from : https://stanford.edu/~mwaskom/software/seaborn/examples/many_pairwise_correlations.html
    '''
    try:
        print "\nGenerating the graphical correlation matrix...\n"
        time.sleep(3)

        corr = data.corr()
        f, ax = plt.subplots(figsize=(20, 20))
        # Generate a custom diverging colormap
        cmap = sns.diverging_palette(220, 10, as_cmap=True)
        # Draw the heatmap with the mask and correct aspect ratio
        sns.heatmap(corr, cmap=cmap,
                    square=True, xticklabels=False, yticklabels=False,
                    linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
        plt.title('Correlation Matrix', fontsize=30)
        ax.set_ylabel('Features', fontsize=20)
        ax.set_xlabel('Features', fontsize=20)
        xticklabels = ['video_category_id','viewCount','likeCount','dislikeCount','favoriteCount','commentCount','dimension','definition','caption','licensedContent']
        ylabel = xticklabels[::-1]
        ax.set_xticklabels(xticklabels, rotation=45)
        ax.set_yticklabels(ylabel, rotation=0)
        name = "../YoutubeData/correlation_matrix.pdf"
        plt.savefig(name)
        print "\nPlease close the Bar Chart when you want to move ahead..."
        plt.show()

        print "You can always retrieve the graphical correlation matrix in YoutubeData folder.\n"
        time.sleep(3)
        return True
    except:
        raise VideoAnalysisException(" Error while Generating the graphical correlation matrix")
Exemple #3
0
def plot_Bayes_pval_map(priors, posterior):

    """

    :param priors: list of xidplus.prior classes
    :param posterior: xidplus.posterior class
    :return: the default xidplus Bayesian P value map plot
    """
    sns.set_style("white")
    mod_map_array = postmaps.replicated_maps(priors, posterior, posterior.samples['lp__'].size)
    Bayes_pvals = []

    cmap = sns.diverging_palette(220, 20, as_cmap=True)

    hdulists = list(map(lambda prior: postmaps.make_fits_image(prior, prior.sim), priors))
    fig = plt.figure(figsize=(10 * len(priors), 10))
    figs = []
    for i in range(0, len(priors)):
        figs.append(aplpy.FITSFigure(hdulists[i][1], figure=fig, subplot=(1, len(priors), i + 1)))
        Bayes_pvals.append(postmaps.make_Bayesian_pval_maps(priors[i], mod_map_array[i]))

    for i in range(0, len(priors)):
        figs[i].show_markers(priors[i].sra, priors[i].sdec, edgecolor='black', facecolor='black',
                             marker='o', s=20, alpha=0.5)
        figs[i].tick_labels.set_xformat('dd.dd')
        figs[i].tick_labels.set_yformat('dd.dd')
        figs[i]._data[
            priors[i].sy_pix - np.min(priors[i].sy_pix) - 1, priors[i].sx_pix - np.min(priors[i].sx_pix) - 1] = \
        Bayes_pvals[i]
        figs[i].show_colorscale(vmin=-6, vmax=6, cmap=cmap)
        figs[i].add_colorbar()
        figs[i].colorbar.set_location('top')
    return figs, fig
def plot_2_corr_heatmaps(corr1, corr2, labels, title1, title2):
    fig=plt.figure(figsize=(9, 8))
    gs = gridspec.GridSpec(1, 2)
    ax1 = fig.add_subplot(gs[0, 0])
    ax2 = fig.add_subplot(gs[0, 1])
    
    sns.set(style="white")
    
    # Generate a mask for the upper triangle
    mask = np.zeros_like(corr1, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True

    # Generate a custom diverging colormap
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    
    # Draw the heatmap with the mask and correct aspect ratio
    sns.heatmap(corr1, mask=mask, cmap=cmap, vmax=.3,
                square=True, xticklabels=labels, yticklabels=labels,
                linewidths=.5, ax=ax1, cbar_kws={"shrink": .3}, annot=True)
    ax1.set_title(title1)
    sns.heatmap(corr2, mask=mask, cmap=cmap, vmax=.3,
                square=True, xticklabels=labels, yticklabels=labels,
                linewidths=.5, ax=ax2, cbar_kws={"shrink": .3}, annot=True)
    ax2.set_title(title2)
    fig.tight_layout()
    plt.show()
Exemple #5
0
def res_matrix(mark,state,cut_off=40):
    path = os.path.join(get_data_dir(), "tmp", "{0} in {1}-{2}.csv".format(mark, state,cut_off))
    DF = pd.read_csv(path, sep='\t')
    Full_EID_list = get_full_EID_list()
    res_matrix = []
    tmp = [0.]*len(Full_EID_list)
    for i in range(0,len(DF.index),1):
        try:
            if DF.chromMiddle[i-1] == DF.chromMiddle[i]:
                tmp[Full_EID_list.index(DF.EID[i])] = DF.signalValue[i]
            else:
                res_matrix.append(tmp)
                tmp = [0.]*len(Full_EID_list)
        except:
            pass
    
    f, ax = plt.subplots(figsize=(15, 15))
    cmap = sns.diverging_palette(210, 10, as_cmap=True)
    sns.corrplot(np.array(res_matrix), annot=False, sig_stars=False,   # .T??
             diag_names=False, cmap=cmap, ax=ax)
    f.tight_layout()
    plt.show()
     
    path2 = os.path.join(get_data_dir(), "tmp","{0} in {1}-{2}_diff.csv".format(mark,state,cut_off)) 
    a = open(path2,'w')
    for i in range(0,len(res_matrix[0]),1):
        for j in range(0,len(res_matrix),1):
            a.write(str(res_matrix[j][i])+"\t")
        a.write("\n")
    a.close() 
def main():

    # Load list of pointing IDs
    todo_file = rawdata_dir + 'todo_list.ascii.dat'
    ID_list   = np.genfromtxt(todo_file, skip_header=1, usecols=[0], unpack=True,
                            dtype=str)
    N_los = len(ID_list)

    # Load bins centers
    bins_file   = 'rbins.ascii.dat'
    bin_centers = np.genfromtxt(bins_file, skip_header=1, usecols=[2], unpack=True)
    N_bins      = len(bin_centers)

    # Round bin centers to three decimal places
    bin_centers = np.round(bin_centers, 3)

    # Make array of column names for pandas Dataframe
    col_names = []

    for i in range(N_bins):
        name = str(bin_centers[i])
        col_names.append(name)

    # Recast as array
    col_names = np.asarray(col_names)

    # Create list of png's for use in making gif
    png_list =[]

    # Calculate correlation matrix for each l.o.s.
    for ID in ID_list:

        # Load counts from 1000 mocks with pandas
        # Each row is a mock, each column is a bin
        counts_filename = counts_dir + 'counts_all_' + ID + '.dat'
        DF = pd.read_csv(counts_filename, sep='\s+', names=col_names)

        # Calculate correlation matrix
        corr = DF.corr()

        # plot heatmap of matrix
        plt.clf()
        sns.set(style="white")
        mask = np.zeros_like(corr, dtype=np.bool)
        mask[np.triu_indices_from(mask)] = True
        f, ax = plt.subplots(figsize=(11, 9))
        cmap = sns.diverging_palette(145, 280, s=85, l=25, n=7, as_cmap=True)
        sns.heatmap(corr, mask=mask, cmap=cmap,square=True, annot=True,
                    xticklabels=col_names, yticklabels=col_names, linewidths=.5,
                    cbar_kws={"shrink": .5}, ax=ax, vmin=-1.0, vmax=1.0)
        plt.title('Correlation Matrix for l.o.s. ' + ID, fontsize=20)
        plt.xlabel('Bin Center (kpc)', fontsize=18)
        plt.ylabel('Bin Center (kpc)', fontsize=18)

        fig_name = plots_dir + 'corr_matrix_' + ID + '.png'
        plt.savefig(fig_name)
        png_list.append(fig_name)

    gif_name = plots_dir + 'corr_matrix.gif'
    GIF_MOVIE(png_list, gif_name)
    def __init__(self, master, x_train, y_train, x_test, y_test, evaluator, df, console):
        Tk.Frame.__init__(self, master)
        self.x_train = x_train
        self.y_train = y_train
        self.x_test = x_test
        self.y_test = y_test
        self.evaluator = evaluator
        self.df = df
        self.console = console

        frame_train = Tk.Frame(self)
        frame_train.pack(fill=Tk.BOTH, expand=1, padx=15, pady=15)
        plt.figure(figsize=(12, 20))
        plt.subplot(111)

        # 背景色白色
        sns.set(style="white")
        # 特征关联矩阵(矩阵里不仅包含特征,还包括类别)
        corr = df.corr()
        # 隐藏矩阵的上三角
        mask = np.zeros_like(corr, dtype=np.bool)
        mask[np.triu_indices_from(mask)] = True
        # 画图
        f, ax = plt.subplots(figsize=(11, 11))
        cmap = sns.diverging_palette(220, 10, as_cmap=True)
        sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
        plt.xticks(rotation=-90)
        plt.yticks(rotation=0)
        plt.title("Cardiotocography \"Feature-Feature\" & \"Feature-Label\" Correlations")
        self.attach_figure(plt.gcf(), frame_train)
def plot_feature_corr(X, f_sz = (11, 9)):
	"""
	Purpose: plot a correlation matrix for the features in X
	Inputs:	X: a pandas dataframe of feature values
			f_sz: a tuple for the figure size
	Output: the correlation matrix of X
	"""
	sns.set(style="white")

	# Compute the correlation matrix
	corr = X.corr()

	# Generate a mask for the upper triangle
	mask = np.zeros_like(corr, dtype=np.bool)
	mask[np.triu_indices_from(mask)] = True
	
	# Set up the matplotlib figure
	f, ax = plt.subplots(figsize= f_sz)
	
	# Generate a custom diverging colormap
	cmap = sns.diverging_palette(220, 10, as_cmap=True)

	# Draw the heatmap with the mask and correct aspect ratio
	sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3,
		square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)

	return corr
Exemple #9
0
    def _process(self,data):
        for x in data:
            
            if data[x][1] not in self.data:
                #prepares the data to visualise the xcor matrix of a specific batch number.
                self.data[data[x][1]]={}
                self.data[data[x][1]]['matrix']=numpy.identity(self.size)
                self.data[data[x][1]]['ro_count']=0
            
            self.data[data[x][1]]['matrix'][(data[x][2][1],data[x][2][0])]=data[x][0]
            #self.addToProvState('batch_'+str(data[x][1]),self.data[data[x][1]]['matrix'],metadata={'matrix':str(self.data[data[x][1]]['matrix'])},dep=['batch_'+str(data[x][1])],ignore_inputs=False)
            self.data[data[x][1]]['ro_count']+=1
            
            if self.data[data[x][1]]['ro_count']==(self.size*(self.size-1))/2:
                matrix=self.data[data[x][1]]['matrix']
                
                d = pd.DataFrame(data=matrix,
                 columns=range(0,self.size),index=range(0,self.size))
                
                mask = numpy.zeros_like(d, dtype=numpy.bool)
                mask[numpy.triu_indices_from(mask)] = True

                # Set up the matplotlib figure
                f, ax = plt.subplots(figsize=(11, 9))

                # Generate a custom diverging colormap
                cmap = sns.diverging_palette(220, 10, as_cmap=True)

                # Draw the heatmap with the mask and correct aspect ratio
                sns.heatmap(d, mask=mask, cmap=cmap, vmax=1,
                    square=True,
                    linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
                
                sns.plt.savefig("./plots/"+str(data[x][1])+"_plot.png") 
                self.write('output',(matrix,data[x][1]),metadata={'matrix':str(d),'batch':str(data[x][1])},dep=['batch_'+str(data[x][1])])
def plot_EFA_retest(combined, size=4.6, dpi=300, 
                    ext='png', plot_dir=None):
    corr = combined.corr()
    max_val = abs(corr).max().max()
    
    fig = plt.figure(figsize=(size,size)); 
    ax = fig.add_axes([.1, .1, .8, .8])
    cbar_ax = fig.add_axes([.92, .15, .04, .7])
    sns.heatmap(corr, square=True, ax=ax, cbar_ax=cbar_ax,
                vmin=-1, vmax=1,
                cmap=sns.diverging_palette(220,15,n=100,as_cmap=True),
                cbar_kws={'orientation': 'vertical',
                          'ticks': [-1, 0, 1]}); 
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
    ax.tick_params(labelsize=size/len(corr)*40)
    
    # format cbar axis
    cbar_ax.set_yticklabels([format_num(-max_val), 0, format_num(max_val)])
    cbar_ax.tick_params(labelsize=size, length=0, pad=size/2)
    cbar_ax.set_ylabel('Factor Loading', rotation=-90, 
                   fontsize=size, labelpad=size/2)
    
    # set divider lines
    n = corr.shape[1]
    ax.axvline(n//2, 0, n, color='k', linewidth=size/3)
    ax.axhline(n//2, 0, n, color='k', linewidth=size/3)
    
    if plot_dir is not None:
            save_figure(fig, path.join(plot_dir, 'EFA_test_retest_heatmap.%s' % ext),
                        {'bbox_inches': 'tight', 'dpi': dpi})
            plt.close()
def plot_corr(file, score, stat, ind_var, brain_type):

    # seaborn
    sns.set(style="white")

    # import the dataframe
    dt = pd.read_csv(file)

    # Compute the correlation matrix
    corr = dt.corr()

    ### Create the matrix figure with seaborn
    # Generate a mask for the upper triangle
    mask = np.zeros_like(corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True

    # Set up the matplotlib figure
    f, ax = plt.subplots(figsize=(len(ind_var),len(ind_var)))

    # Generate a custom diverging colormap
    cmap = sns.diverging_palette(220, 10, as_cmap=True)

    # Draw the heatmap with the mask and correct aspect ratio
    sns.heatmap(corr, mask=mask, cmap=cmap, annot=False, ax=ax)
    plt.subplots_adjust(left= 0.30,bottom=0.30)
    plt.savefig(os.path.join(stat,score, "heatmap_" + score + "_" + stat + "_"+ brain_type + ".png"))
    plt.close()

    return corr
def f2hex_nodes(fx, vmin, vmax, midpoint):
    norm = MidpointNormalize(vmin=vmin, vmax=vmax, midpoint=midpoint)
    f2rgb = cm.ScalarMappable(norm=norm, cmap=sns.diverging_palette(150, 275, s=80, l=55, as_cmap=True))
    rgb = [f2rgb.to_rgba(rate)[:3] for rate in fx]
    colors_hex = [0]*(len(rgb))
    for i, color in enumerate(rgb):
        colors_hex[i] = '#%02x%02x%02x' % tuple([255 * fc for fc in color])
    return colors_hex
Exemple #13
0
def make_corr_plot(d, title="plot"):
    f, ax = plt.subplots(figsize=(9, 9))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.corrplot(d, annot=False, sig_stars=False,
                 diag_names=False, cmap=cmap, ax=ax)
    f.tight_layout()
    plt.title(title)
    f.savefig(title)
Exemple #14
0
def heat_map(corrs_mat):
    sns.set(style = "white")
    f, ax = plt.subplots(figsize = (11, 9))
    mask = np.zeros_like(corrs_mat, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True
    # Generate a custom diverging colormap
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.heatmap(corrs_mat, mask=mask, cmap=cmap, ax=ax)
Exemple #15
0
def heatmap_Sigcelltype(args, df, path):
    import seaborn as sns
    '''
    To plot stack plot df as heatmap
    '''
    #print(args.key_celltype_list)
    if args['key_celltype_list']:
        cell_type = ['macrophage', 'Alveolar macrophage',
             'm1 macrophage','m2 macrophage', 'monocyte', 'dendritic cell', 'glial cell',
             'neutrophil', 'mast cell', 'Natural killer cell', 'Kupffer cell', 'Plasma cell',
             'eosinophil', 'naive B cell', 'memory B cell', 'B lymphocyte', 'T lymphocyte',
             'naive T cell', 'memory T cell', 'CD8 T cell', 'CD4 T cell', 'regulatory T cell','Cytotoxic T cell',
             'helper T cell']
        # creating df for heatmap
        new_df = pd.DataFrame(0, columns=df.columns, index=cell_type)
        #print(new_df)
        for k, v in df.iterrows():
            for c, val in v.iteritems():
                #print(c, val)
                new_df.loc[k, c] = val
        # plotting df
        new_df = new_df.T
        sns.set_context("talk")
        cmap = sns.diverging_palette(255, 15, sep=20, n=3, as_cmap=True)
        plt.clf()
        plt.figure(figsize=[20,10])
        sns.heatmap(new_df.round(2), cmap = cmap, vmin=0, vmax=0.2, yticklabels=True, cbar=False,
            xticklabels=True, linecolor='#ffffff',linewidths=0.01, square=True, annot=True)
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.savefig(os.path.join(path, 'GCAM_cofficients.svg'))
        plt.close()
    else:
        # creating df for heatmap
        df = df.T
        sns.set_context("talk")
        cmap = sns.diverging_palette(255, 15, sep=20, n=3, as_cmap=True)
        plt.clf()
        plt.figure(figsize=[20,10])
        sns.heatmap(df.round(2), cmap = cmap, vmin=0, vmax=0.2, yticklabels=True, cbar=False,
            xticklabels=True, linecolor='#ffffff',linewidths=0.01, square=True, annot=True)
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.savefig(os.path.join(path, 'GCAM_cofficients.svg'))
        plt.close()
def make_corr(data):
	"""This method creates a scatter, correlation matrix."""
	sns.set(style="white")
	corr = data.corr()
	mask = np.zeros_like(corr, dtype = np.bool)
	mask[np.triu_indices_from(mask)] = True
	f, ax = plt.subplots(figsize = (22, 18))
	cmap = sns.diverging_palette(255, 140, as_cmap = True)
	sns.heatmap(corr, mask = mask, cmap = cmap, vmax = .3, 
		square = True, xticklabels = True, yticklabels = True, linewidths = 1, cbar_kws = {"shrink": .5}, ax = ax)
Exemple #17
0
def heatmap(df):
    corr = df.drop(['group', 'id'], axis=1).corr()
    mask = np.zeros_like(corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True
    fig = plt.figure(figsize=[8,8])
    #cmap = sns.cubehelix_palette(8, light=0.8, dark=0.2, as_cmap=True)
    cmap = sns.diverging_palette(240, 10, as_cmap=True)
    sns.heatmap(corr, mask=mask, vmax=.8, square=True, cmap=cmap, linewidths=0.8, annot=True)
    plt.title("Correlation Matrix")
    plt.savefig(os.path.join(FIG_PATH, 'heatmap.png'))
def display_closest_DVs(consensus, n_closest=10):
    nth = {
        1: "first",
        2: "second",
        3: "third",
        4: "fourth",
        5: "fifth",
        6: "sixth",
        7: "seventh",
        8: "eigth",
        9: "ninth",
        10: "tenth",
    }
    df = consensus.get_consensus_cluster()['distance_df']
    df.index = format_variable_names(df.index)
    df.columns = format_variable_names(df.columns)

    sorted_df = pd.DataFrame(data=np.zeros((len(df),n_closest)), index=df.index)
    sorted_df.columns = [nth[i+1] for i in sorted_df.columns]
    for name, row in sorted_df.iterrows():
        closest = 1-df.loc[name].drop(name).sort_values()[:n_closest]
        closest = ['%s: %s%%' % (i,int(b*100)) for i,b in closest.iteritems()]
        sorted_df.loc[name] = closest
        
    def magnify():
        return [dict(selector="tr:hover",
                    props=[("border-top", "2pt solid black"),
                           ("border-bottom", "2pt solid black")]),
                dict(selector="th:hover",
                     props=[("font-size", "10pt")]),
                dict(selector="td",
                     props=[('padding', "0em 0em")]),
               # dict(selector="th:hover",
               #      props=[("font-size", "12pt")]),
                dict(selector="tr:hover td:hover",
                     props=[('max-width', '200px'),
                            ('font-weight', 'bold'),
                            ('color', 'black'),
                           ('font-size', '9pt')])
    ]

    cm =sns.diverging_palette(220,15,n=161)
    def color_cell(val):
        val = val[val.rindex(': ')+2:val.rindex('%')]
        color = to_hex(cm[int(val)+30])
        return 'background-color: %s' % color


    styler = sorted_df.style
    styler \
        .applymap(color_cell) \
        .set_properties(**{'max-width': '100px','font-size': '10pt', 'border-color': 'white'})\
        .set_precision(2)\
        .set_table_styles(magnify())
    return styler
def plot_factor_correlation(results, c, rotate='oblimin', title=True,
                            DA=False, size=4.6, dpi=300, ext='png', plot_dir=None):
    if DA:
        EFA = results.DA
    else:
        EFA = results.EFA
    loading = EFA.get_loading(c, rotate=rotate)
    # get factor correlation matrix
    reorder_vec = EFA.get_factor_reorder(c)
    phi = get_attr(EFA.results['factor_tree_Rout_%s' % rotate][c],'Phi')
    phi = pd.DataFrame(phi, columns=loading.columns, index=loading.columns)
    phi = phi.iloc[reorder_vec, reorder_vec]
    mask = np.zeros_like(phi)
    mask[np.tril_indices_from(mask, -1)] = True
    with sns.plotting_context('notebook', font_scale=2) and sns.axes_style('white'):
        f = plt.figure(figsize=(size*5/4, size))
        ax1 = f.add_axes([0,0,.9,.9])
        cbar_ax = f.add_axes([.91, .05, .03, .8])
        sns.heatmap(phi, ax=ax1, square=True, vmax=1, vmin=-1,
                    cbar_ax=cbar_ax, 
                    cmap=sns.diverging_palette(220,15,n=100,as_cmap=True))
        sns.heatmap(phi, ax=ax1, square=True, vmax=1, vmin=-1,
                    cbar_ax=cbar_ax, annot=True, annot_kws={"size": size/c*15},
                    cmap=sns.diverging_palette(220,15,n=100,as_cmap=True),
                    mask=mask)
        yticklabels = ax1.get_yticklabels()
        ax1.set_yticklabels(yticklabels, rotation=0, ha="right")
        ax1.set_xticklabels(ax1.get_xticklabels(), rotation=90)
        if title == True:
            ax1.set_title('%s Factor Correlations' % results.ID.split('_')[0].title(),
                      weight='bold', y=1.05, fontsize=size*3)
        ax1.tick_params(labelsize=size*3)
        # format cbar
        cbar_ax.tick_params(axis='y', length=0)
        cbar_ax.tick_params(labelsize=size*2)
        cbar_ax.set_ylabel('Pearson Correlation', rotation=-90, labelpad=size*4, fontsize=size*3)
    
    if plot_dir:
        filename = 'factor_correlations_EFA%s.%s' % (c, ext)
        save_figure(f, path.join(plot_dir, filename), 
                    {'bbox_inches': 'tight', 'dpi': dpi})
        plt.close()
Exemple #20
0
def l_reg(input_path):
    DF = pd.read_csv(input_path)
    DF.drop('gene_id', axis=1, inplace=True)
    #corr_mat = np.corrcoef(DF.as_matrix())
    f, ax = plt.subplots(figsize=(20, 20))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.corrplot(DF.as_matrix().T, annot=False, sig_stars=False,
             diag_names=False, cmap=cmap, ax=ax)
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    plt.savefig(os.path.join(get_data_dir(), "tmp", "H3K27me3_corrplot.png"))
Exemple #21
0
def square_matrix_plot(matrix, vmax=1, vmin=0):
    sns.set(style="white")
    corr = 1 - matrix
    mask = np.zeros_like(corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True
    fig, ax = plt.subplots(figsize=(11, 9))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.set_context("talk")
    # Draw the heatmap with the mask and correct aspect ratio
    sns.heatmap(corr, cmap="YlGnBu", vmax=vmax, vmin=vmin, square=True, linewidths=0.5, cbar_kws={"shrink": 0.9}, ax=ax)
    plt.title("pairwise")
    return fig, ax
Exemple #22
0
def plot_morph(good_spikes, cluster, morph_dim, spacing=.02, ymax=.04):
    plt.figure(figsize=(20,20))
    with sns.color_palette(sns.xkcd_palette(["twilight blue", "kermit green"]), 2):
        plt.subplot(222)
        stim_name = morph_dim[1]+"_rec"
        spks2plot = good_spikes[(good_spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))]
        plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name)
        stim_name = morph_dim+'128'
        spks2plot = good_spikes[(good_spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))]
        plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name)
        plt.legend(loc=1)
        ax = plt.gca()
        ax.plot((0, 0), (0, ymax), c=".2", alpha=.5)
        ax.plot((stim_length, stim_length), (0, ymax), c=".2", alpha=.5)
        xlim(-.5, 1)
        ylim(0,ymax)
        plt.xticks([0, .5])
        plt.yticks([0, .5*ymax, ymax])
        plt.title('cell: %d   morph dim: %s' % (cluster, morph_dim))

        plt.subplot(224)
        stim_name = morph_dim[0]+"_rec"
        spks2plot = good_spikes[(spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))]
        plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name)
        stim_name = morph_dim+'001'
        spks2plot = good_spikes[(spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))]
        plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name)
        plt.legend(loc=1)
        ax = plt.gca()
        ax.plot((0, 0), (0, ymax), c=".2", alpha=.5)
        ax.plot((stim_length, stim_length), (0, ymax), c=".2", alpha=.5)
        xlim(-.5, 1)
        ylim(0,ymax)
        plt.xticks([0, .5])
        plt.yticks([0, .5*ymax, ymax])

    with sns.color_palette(sns.diverging_palette(262, 359, s=99, l=43, sep=1, n=128, center="dark"), 128):
        plt.subplot(121)
        spks_morph = good_spikes[(good_spikes['cluster']==cluster) & (good_spikes['morph_dim']==morph_dim)]
        morph_ymax = 128*spacing+ymax
        for morph_pos in np.unique(spks_morph['morph_pos'].values):
            stim_name = morph_dim + str(int(morph_pos))
            spks2plot = spks_morph[spks_morph['morph_pos'] == morph_pos]
            plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, offset=morph_pos*spacing, label=stim_name)
        ax = plt.gca()
        ax.plot((0, 0), (0, morph_ymax), c=".2", alpha=.5)
        ax.plot((stim_length, stim_length), (0, morph_ymax), c=".2", alpha=.5)
        xlim(-.5, 1)
        ylim(0,morph_ymax)
        plt.xticks([0, .5])
        plt.yticks([])
        plt.tick_params(axis='y', which='both', bottom='off', top='off', labelbottom='off')
    sns.despine()
 def plotMatrixHeat(self, matrix, path):
     f, ax = plt.subplots(figsize=(11, 9))
     
     # Generate a custom diverging colormap
     cmap = sns.diverging_palette(220, 10, as_cmap=True)
     
     # Draw the heatmap with the mask and correct aspect ratio
     sns.heatmap(matrix, cmap=cmap, vmax=matrix.max(),
                 square=True, xticklabels=5, yticklabels=5,
                 linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
     
     plt.savefig(path)
Exemple #24
0
def main():
    movie_raw_data = pd.read_csv('../input/movie_metadata.csv')
    print movie_raw_data.head(3)

    print movie_raw_data.isnull().sum()

    print movie_raw_data.shape
    movie_raw_data_dropna=movie_raw_data.dropna()
    print movie_raw_data_dropna.shape
    print movie_raw_data.dtypes


    # movie_filterd_imdbscore=movie_raw_data['imdb_score'].loc
    # movie_filterd_imdbscore=movie_raw_data.loc[movie_raw_data['imdb_score'].isin([2,3])]

    movie_filterd_imdbscore_first=movie_raw_data.loc[movie_raw_data['imdb_score'] >5]
    movie_filterd_imdbscore_from_raw=movie_raw_data.loc[movie_raw_data['imdb_score'] <8]

    print movie_filterd_imdbscore_first.shape


    movie_filterd_imdbscore_second=movie_filterd_imdbscore_first.loc[movie_raw_data['imdb_score'] <8]

    print movie_filterd_imdbscore_second.shape
    print movie_filterd_imdbscore_from_raw.shape

    print '*********************************'

    print movie_raw_data_dropna.head(3)
    profit=(((movie_raw_data_dropna['gross'].values-movie_raw_data_dropna['budget'].values))/(movie_raw_data_dropna['gross'].values))*100
    print profit

    movie_raw_data_dropna.loc[:,'profit']=pd.Series(profit, movie_raw_data_dropna.index)
    print movie_raw_data_dropna.shape
    print movie_raw_data_dropna.head(3)


    corr=movie_raw_data_dropna.corr()
    print corr

    f, ax = plt.subplots(figsize=(11, 9))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.heatmap(corr, cmap=cmap, vmax=1,
            square=True,
            linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)

    g = sns.jointplot(x="title_year", y="profit",kind='scatter',size=10,ylim = [0,110],xlim=[1980,2020],data=movie_raw_data_dropna)
    h = sns.jointplot(x="imdb_score", y="profit",kind='reg',size=10,ylim = [0,110],data=movie_raw_data_dropna)

    # j = sns.pairplot(movie_raw_data_dropna,hue='content_rating')

    plt.show()
def heatmap_plot_zscore(df_zscore_features, df_all, output_dir, title=None):
    pl.figure()

    # Create a custom palette for creline colors
    cre_lines = np.unique(df_all['cre_line'])
    cre_line_pal = sns.color_palette("hls", len(cre_lines))
    cre_line_lut = dict(zip(cre_lines, cre_line_pal))  # map creline type to color
    creline_colors = df_all['cre_line'].map(cre_line_lut)

    # Create a custom palette for dendrite_type colors
    dendrite_types = np.unique(df_all['dendrite_type'])
    dendrite_type_pal = sns.color_palette("hls", len(dendrite_types))
    dendrite_type_lut = dict(zip(dendrite_types, dendrite_type_pal))
    dendritetype_colors = df_all['dendrite_type'].map(dendrite_type_lut)

    # Create a custom colormap for the heatmap values
    cmap = sns.diverging_palette(240, 10, as_cmap=True)

    r_linkage = hierarchy.linkage(df_zscore_features, method='ward', metric='euclidean')
    c_linkage = hierarchy.linkage(df_zscore_features.T, method='ward', metric='euclidean')

    # PLOT
    g = sns.clustermap(df_zscore_features, row_linkage=r_linkage, method='ward', metric='euclidean',
                       linewidths=0.0, row_colors=dendritetype_colors, cmap=cmap,
                       xticklabels=True, yticklabels =False)
    if title:
        pl.title(title)
    # TODO : adjust creline tag size
    # print type(g.data)
    #print g.data.columns
    #crelines = g.data['cre_line']
    #g.ax_heatmap.set_yticklabels(crelines, fontsize=3)

    assignment = hierarchy.fcluster(r_linkage, 2, criterion="maxclust")

    # Legend for row and col colors
    for label in dendrite_types:
        g.ax_row_dendrogram.bar(0, 0, color=dendrite_type_lut[label], label=label, linewidth=0)
        g.ax_row_dendrogram.legend(loc="center", ncol=1)

    #for label in cre_lines:
    #   g.ax_col_dendrogram.bar(0, 0, color=cre_line_lut[label], label=label, linewidth=0)
    #   g.ax_col_dendrogram.legend(loc="center", ncol=3)


    #pl.show()
    pl.title('zscore')
    filename = output_dir + '/zscore_feature_heatmap.png'
    pl.savefig(filename, dpi=300)
    print("save zscore matrix heatmap figure to :" + filename)
    pl.close()
    return g
Exemple #26
0
def plot_correlation_map(df):
    corr = df.corr()
    _, ax = plt.subplots(figsize=(12, 10))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    _ = sns.heatmap(
        corr,
        cmap=cmap,
        square=True,
        cbar_kws={'shrink': .9},
        ax=ax,
        annot=True,
        annot_kws={'fontsize': 12}
    )
Exemple #27
0
def corr_analysis(data):
    corr=data.corr()
    #Generate a mask for the upper triangle
    mask = np.zeros_like(corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True
    f, ax = plt.subplots(figsize=(20, 20))
    # Generate a custom diverging colormap
    cmap = sns.diverging_palette(110, 10,as_cmap=True)
    # Draw the heatmap with the mask and correct aspect ratio
    sns.heatmap(corr, mask=mask, cmap=cmap, vmax=1,linewidths=.5,
            cbar_kws={"shrink": .6},annot=True,annot_kws={"size":8} )
    plt.xticks(rotation=90)
    plt.yticks(rotation=0)
    plt.show()
Exemple #28
0
def Show_correlation(df, cols=None):
    """
    Use a heatmap of the correlations of DataFrame columns to estimate the
    features to engineer.
    """
    if cols:
        df = df[cols + ['target']]
    corrmat = df.corr()
    plt.figure(figsize = (12, 10))
    cmap = sns.diverging_palette(h_neg=210, h_pos=350, s=90, l=30, as_cmap=True)
    sns.heatmap(corrmat, cmap=cmap, annot=True, fmt="f")
    plt.xticks(rotation = 90); plt.yticks(rotation = 0)
    plt.tight_layout()
    plt.show()
Exemple #29
0
 def get_cor_matrix( self,method="pearson" ):
    self.method = method
    
    out_cor_file   = "%s.corMat.%s.pdf" % ( ".".join( self.infile.split(".")[:-2] ), self.method )
    
    pd_mat = pd.DataFrame( self.mat.matrix )
    pd_mat.columns = self.mat.colname
    pd_mat.index   = self.mat.rowname
    self.cor_mat   = pd_mat.corr( self.method ).values
    
    sns.set(style="darkgrid")
    f, ax = plt.subplots(figsize=(9, 9))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.corrplot(pd_mat, annot=False, sig_stars=False, diag_names=False, cmap=cmap, ax=ax, cmap_range=(0.0, 1.0),method=self.method  )
    f.savefig( out_cor_file,format="pdf" )
def plot_jaccard_heatmap(communities,shape=30,out=None):
	data =np.array(list(map(jaccard_similarity,list(product(communities, repeat=2)))))
	data = data.reshape(shape,shape)
	ax = plt.axes()
	cmap = sns.diverging_palette(220, 10, as_cmap=True)
	heat = sns.heatmap(data,cmap=plt.cm.Reds,square=True,linewidths=.5, cbar_kws={"shrink": .5},ax = ax)
	heat.invert_yaxis()
	plt.ylabel("Comunity ID")
	plt.xlabel("Comunity ID")
	plt.yticks(size='small',rotation='horizontal')
	plt.xticks(size='small',rotation='vertical')
	if out == None:
		plt.show()
	else:
		plt.savefig(out+".svg",bbox_inches="tight")
	plt.close()
Exemple #31
0
        ax2.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
        ax3.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
        ax4.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
    plt.savefig('%s' % (savepath), bbox_inches='tight')
    plt.close(fig)


#-----------------------------------------------------------------------------#
if __name__ == '__main__':
    main()

color_palette = {
    'purples':
    sns.cubehelix_palette(12)[5:10],
    'groups': [
        sns.diverging_palette(10, 240, n=27),
        sns.diverging_palette(10, 240, n=27),
        sns.diverging_palette(10, 240, n=27),
        sns.diverging_palette(10, 240, n=27), '0.5'
    ],
    'greens':
    sns.cubehelix_palette(rot=-.4, n_colors=12)[4:9],
    'purples_ex':
    sns.cubehelix_palette(12),
    'g_ex':
    sns.color_palette("GnBu_d", n_colors=16)
}

styles = {
    'cadis': {
        'ls': '-.',
Exemple #32
0
    def score(self, y_true, y_probs, subgroup_df, output=True):
        """Parameters
        ----------
        y_true : pandas Series, pandas DataFrame
            The true values for all observations.
        y_pred : pandas Series, pandas DataFrame
            The model's predicted values for all observations.
        subgroup_df : pandas DataFrame
            Dataframe of all subgroups to be compared. Each column should be a
            specific subgroup with 1 to indicating the observation is a part of
            the subgroup and 0 indicating it is not. There should be no other values
            besides 1 or 0 in the dataframe.
        output : boolean (default = True)
            If true returns a heatmap of the AEG scores.
        """

        import numpy as np
        import pandas as pd
        from scipy.stats import mannwhitneyu

        def calc_pos_aeg(parameter, df):
            sub_probs = df[((df.target == 1) & (df[parameter] == 1))]['probs']
            back_probs = df[((df.target == 1) & (df[parameter] == 0))]['probs']
            pos_aeg = (.5 - (mannwhitneyu(sub_probs, back_probs)[0] /
                             (len(sub_probs) * len(back_probs))))
            return round(pos_aeg, 2)

        def calc_neg_aeg(parameter, df):
            sub_probs = df[((df.target == 0) & (df[parameter] == 1))]['probs']
            back_probs = df[((df.target == 0) & (df[parameter] == 0))]['probs']
            neg_aeg = (.5 - (mannwhitneyu(sub_probs, back_probs)[0] /
                             (len(sub_probs) * len(back_probs))))
            return round(neg_aeg, 2)

        # ensure that the passed dataframe has an appropriate axis
        subgroup_df.reset_index(drop=True, inplace=True)

        # ensure input true and prob values are formatted correctly
        if type(y_true) == pd.core.frame.DataFrame:
            y_true.columns = ['target']
            y_true.reset_index(drop=True, inplace=True)
        else:
            y_true = pd.DataFrame(y_true,
                                  columns=['target']).reset_index(drop=True)

        if type(y_probs) == pd.core.frame.DataFrame:
            y_probs.columns = ['probs']
            y_probs.reset_index(drop=True, inplace=True)
        else:
            y_probs = pd.DataFrame(y_probs,
                                   columns=['probs']).reset_index(drop=True)

        # combine all inputs into a DataFrame
        input_df = pd.concat([y_true, y_probs, subgroup_df], axis=1)

        # build dataframe and fill with ROC AUC metrics
        self.output_df = pd.DataFrame(index=subgroup_df.columns,
                                      columns=['Positive AEG', 'Negative AEG'])
        for col in subgroup_df.columns:
            self.output_df.loc[col] = [
                calc_pos_aeg(col, input_df),
                calc_neg_aeg(col, input_df)
            ]

        if output:
            import seaborn as sns
            sns.heatmap(self.output_df.astype('float32'),
                        vmin=-.5,
                        vmax=.5,
                        cmap=sns.diverging_palette(10, 10, n=101),
                        annot=True,
                        linewidths=2)
Exemple #33
0
def plot_top_heatmap(coef_df, auc_vals, pheno_dict, args):
    coef_mat = coef_df.groupby(level=0, axis=1).mean()
    coef_mat = (coef_mat.transpose() / coef_mat.abs().max(axis=1)).transpose()

    if args.auc_cutoff == -1:
        min_auc = auc_vals[MuType({('Gene', args.gene): pnt_mtype})]
    else:
        min_auc = args.auc_cutoff

    plt_mtypes = {
        mtype
        for mtype, auc_val in auc_vals.iteritems()
        if (not isinstance(mtype, RandomType) and auc_val >= min_auc and (
            tuple(mtype.subtype_iter())[0][1]
            & copy_mtype).is_empty())
    }
    plt_genes = set()

    for mtype in plt_mtypes:
        plt_genes |= set(coef_mat.loc[mtype].abs().sort_values()[-10:].index)

    fig, ax = plt.subplots(figsize=(4 + len(plt_genes) / 4,
                                    1.3 + len(plt_mtypes) / 5.3))

    plot_df = coef_mat.loc[plt_mtypes, plt_genes]
    plot_df = plot_df.iloc[dendrogram(linkage(
        distance.pdist(plot_df, metric='euclidean'), method='centroid'),
                                      no_plot=True)['leaves'],
                           dendrogram(linkage(distance.pdist(
                               plot_df.transpose(), metric='euclidean'),
                                              method='centroid'),
                                      no_plot=True)['leaves']]

    coef_cmap = sns.diverging_palette(13, 131, s=91, l=41, sep=3, as_cmap=True)

    sns.heatmap(plot_df,
                cmap=coef_cmap,
                center=0,
                xticklabels=False,
                yticklabels=False)

    for i, mtype in enumerate(plot_df.index):
        if mtype == MuType({('Gene', args.gene): pnt_mtype}):
            lbl_wgt = 'bold'
        else:
            lbl_wgt = 'normal'

        ax.text(-0.29 / plot_df.shape[1],
                1 - ((i + 0.53) / plot_df.shape[0]),
                get_fancy_label(tuple(mtype.subtype_iter())[0][1]),
                size=9,
                weight=lbl_wgt,
                ha='right',
                va='center',
                transform=ax.transAxes)

    for i, gene in enumerate(plot_df.columns):
        ax.text((i + 1) / plot_df.shape[1],
                -0.29 / plot_df.shape[0],
                gene,
                size=12,
                ha='right',
                va='top',
                rotation=47,
                transform=ax.transAxes,
                clip_on=False)

    plt.savefig(os.path.join(
        plot_dir, '__'.join([args.expr_source, args.cohort]),
        "{}_top-heatmap_{}.svg".format(args.gene, args.classif)),
                bbox_inches='tight',
                format='svg')

    plt.close()
Exemple #34
0
def graphing(train, train_truth):
    n = 77
    ########### correlation graph
    corr = pd.DataFrame(train).corr()
    mask = np.triu(np.ones_like(
        corr, dtype=np.bool))  # Generate a mask for the upper triangle
    f, ax = plt.subplots(figsize=(11, 9))  # Set up the matplotlib figure
    cmap = sns.diverging_palette(
        220, 10, as_cmap=True)  # Generate a custom diverging colormap
    sns.set(style="white")
    sns.heatmap(corr,
                mask=mask,
                cmap=cmap,
                vmax=.3,
                center=0,
                square=True,
                linewidths=.5,
                cbar_kws={
                    "shrink": .5
                })  # Draw the heatmap with the mask and correct aspect ratio
    f.savefig('correlation_map.png')
    ########## PCA
    data = train
    data = stats.zscore(data, ddof=1)
    #scaler = MinMaxScaler()
    #data = scaler.fit_transform(data)
    pca = PCA(n_components=n)
    graph_df = pca.fit_transform(data)
    graph_df_var = pca.explained_variance_ratio_
    graph_df = pd.DataFrame(graph_df)
    graph_df['class'] = pd.DataFrame(train_truth)
    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(1, 1, 1)
    ax.set_xlabel('Principal Component 1', fontsize=15)
    ax.set_ylabel('Principal Component 2', fontsize=15)
    ax.set_title('2 component PCA', fontsize=20)
    targets = ['0', '1', '2', '3', '4', '5', '6', '7', '8']
    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'sienna']
    for t, color in zip(targets, colors):
        i = []
        for ind, j in enumerate(graph_df['class']):
            if int(j) == int(t):
                i.append(ind)
        ax.scatter(graph_df.loc[i, 0],
                   graph_df.loc[i, 1],
                   c=color,
                   alpha=0.65,
                   s=30)
    ax.legend(targets)
    ax.grid()
    fig.savefig('2_pca.png')
    ######################## PCA important components histogram
    graph_var_cumulative = np.cumsum(graph_df_var)
    trace1 = dict(type='bar',
                  x=['PC %s' % i for i in range(1, n)],
                  y=graph_df_var,
                  name='Individual')
    trace2 = dict(type='scatter',
                  x=['PC %s' % i for i in range(1, n)],
                  y=graph_var_cumulative,
                  name='Cumulative')
    data = [trace1, trace2]
    layout = dict(title='Explained variance by different principal components',
                  yaxis=dict(title='Explained variance in percent'),
                  annotations=list([
                      dict(
                          x=1.16,
                          y=1.05,
                          xref='paper',
                          yref='paper',
                          text='Explained Variance',
                          showarrow=False,
                      )
                  ]))
    fig = dict(data=data, layout=layout)
    plot(fig, filename='selecting-principal-components.png')
    ######################## LDA
    data = train
    data = remove_collinear(data)  #LDA
    data = pd.DataFrame(data)
    #scaler = MinMaxScaler()
    #data = scaler.fit_transform(data)
    data = stats.zscore(data, ddof=1)
    lda = LDA(n_components=2)
    graph_df = lda.fit_transform(data, train_truth.ravel())
    graph_df = pd.DataFrame(graph_df)
    graph_df['class'] = pd.DataFrame(train_truth)
    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(1, 1, 1)
    ax.set_xlabel('Linear Discriminant 1', fontsize=15)
    ax.set_ylabel('Linear Discriminant 2', fontsize=15)
    ax.set_title('2 Discriminant LDA', fontsize=20)
    targets = ['0', '1', '2', '3', '4', '5', '6', '7', '8']
    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'sienna']
    for t, color in zip(targets, colors):
        i = []
        for ind, j in enumerate(graph_df['class']):
            if int(j) == int(t):
                i.append(ind)
        ax.scatter(graph_df.loc[i, 0],
                   graph_df.loc[i, 1],
                   c=color,
                   alpha=0.65,
                   s=30)
    ax.legend(targets)
    ax.grid()
    fig.savefig('2_lda.png')
    return
Exemple #35
0
# ## Correlation Matrix between all variables

# In[21]:


sns.set(style='white', font_scale= 1)
corr = dataset.corr() # here we compute the correlation between numericals variables
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype= np.bool) # To generate a numpy array from correlation with true or false
mask[np.triu_indices_from(mask)] = True # To have the index of the upper triangle
# Setup the matplotlib figures
f, ax = plt.subplots(figsize = (20,10))
f.suptitle('Correlation Matrix', fontsize=40)
# Generate a custum diverging color map
cmap = sns.diverging_palette(10, 0, as_cmap=True)
# Draw the heatmap with the mask and the correct aspect ratio
sns.heatmap(corr, mask=mask, annot=True, cmap=cmap, vmax=1, center=0,
            square=True, linewidth=5, cbar_kws={'shrink': .5})


# # Feature engineering
# Explain Here

# In[22]:


dataset.dtypes
# The dates are objects, we need to change it. One reason is, we can calculate the difference between the first open and the enrolled date.
# This differece can be visualised in a distribution.
Exemple #36
0
import matplotlib.pyplot as plt
import seaborn as sns
import viscid
from viscid.plot import vpyplot as vlt

f = viscid.load_file('./otico_001.3d.xdmf')

mymap = sns.diverging_palette(28, 240, s=95, l=50, as_cmap=True)

figure = plt.figure(figsize=(14, 10))
g = f.get_grid(time=12)
vlt.plot(g['bx']['z=0'], cmap=mymap, style='contourf', levels=256)
vlt.savefig('OT_bx.png')
plt.show()
Exemple #37
0
from heapq import nlargest

c = nlargest(3, b, key=lambda e: e[1])
print(c)
d = []
for i in c:
    d.append(i[0])

#Compute the correlation matrix
corr = df[d].corr()
print(corr)
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr,
            mask=mask,
            cmap=cmap,
            vmax=.3,
            center=0,
            square=True,
            linewidths=.5,
            cbar_kws={"shrink": .5})
#
Exemple #38
0
# # Desafio 7
# Plotar o gráfico de aparições por gênero. Pode ser um gráfico de tipo = barra

# In[9]:

# Importando as bibliotecas necessárias
import matplotlib.pyplot as plt
import seaborn as sns

# Setando estilos
sns.set_style('whitegrid')
cmap = sns.diverging_palette(150,
                             275,
                             l=55,
                             s=80,
                             n=9,
                             center='dark',
                             as_cmap=True)

# Tamanho da figura
plt.figure(figsize=(15, 8))

# Escolhendo o tipo de gráfico
generos.plot(kind='bar', colormap=cmap)

# Definindo o título
plt.title('Ocorrência de gêneros', fontsize=20)

# Definindo o eixo horizontal
plt.xlabel("Gêneros", fontsize=16)
Exemple #39
0
    cs_i = []
    for i in range(0, len(df_norm)):
        cs_j = []
        for j in range(0, len(df_norm)):
            cs_j.append(
                float(
                    manual_cosine_similarity(
                        df_norm.iloc[i, :].values.reshape(1, -1),
                        df_norm.iloc[j, :].values.reshape(1, -1))))
        cs_i.append(cs_j)
    return cs_i


cosine_similarity_calculation_manually = calculate_cosine_similarity_manually(
    df_norm)

# Creating dataframe for manually calculated cosine similarity
cosine_similarity_manual_df = pd.DataFrame(
    cosine_similarity_calculation_manually)

#*******************************************************************************

df_list = cosine_similarity_df.iloc[0:10, 0:10]

fig, ax = plt.subplots(figsize=(10, 10))
cmap = sns.diverging_palette(240, 10, s=80, l=45, as_cmap=True)
sns.heatmap(df_list, cmap=cmap, vmin=0.95, vmax=1, annot=True)

linear_kernel(df_norm.iloc[0, :].values.reshape(1, -1),
              df_norm.iloc[0, :].values.reshape(1, -1))
Exemple #40
0
#%%|
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

sns.set(style="whitegrid")
test = pd.read_csv('../data/test_edited.csv')
train = pd.read_csv('../data/train_edited.csv')
default_cmap = sns.diverging_palette(220, 20, n=13)

# %%
#### All Variables Except Sale Price ####
all_but = [
    'Id', 'PID', 'MS_SubClass', 'MS_Zoning', 'Lot_Frontage', 'Lot_Area',
    'Street', 'Alley', 'Lot_Shape', 'Land_Contour', 'Utilities', 'Lot_Config',
    'Land_Slope', 'Neighborhood', 'Condition_1', 'Condition_2', 'Bldg_Type',
    'House_Style', 'Overall_Qual', 'Overall_Cond', 'Year_Built',
    'Year_Remod/Add', 'Roof_Style', 'Roof_Matl', 'Exterior_1st',
    'Exterior_2nd', 'Mas_Vnr_Type', 'Mas_Vnr_Area', 'Exter_Qual', 'Exter_Cond',
    'Foundation', 'Bsmt_Qual', 'Bsmt_Cond', 'Bsmt_Exposure', 'BsmtFin_Type_1',
    'BsmtFin_SF_1', 'BsmtFin_Type_2', 'BsmtFin_SF_2', 'Bsmt_Unf_SF',
    'Total_Bsmt_SF', 'Heating', 'Heating_QC', 'Central_Air', 'Electrical',
    '1st_Flr_SF', '2nd_Flr_SF', 'Low_Qual_Fin_SF', 'Gr_Liv_Area',
    'Bsmt_Full_Bath', 'Bsmt_Half_Bath', 'Full_Bath', 'Half_Bath',
    'Bedroom_AbvGr', 'Kitchen_AbvGr', 'Kitchen_Qual', 'TotRms_AbvGrd',
    'Functional', 'Fireplaces', 'Fireplace_Qu', 'Garage_Type', 'Garage_Yr_Blt',
    'Garage_Finish', 'Garage_Cars', 'Garage_Area', 'Garage_Qual',
    'Garage_Cond', 'Paved_Drive', 'Wood_Deck_SF', 'Open_Porch_SF',
    'Enclosed_Porch', '3Ssn_Porch', 'Screen_Porch', 'Pool_Area', 'Pool_QC',
    'Fence', 'Misc_Feature', 'Misc_Val', 'Mo_Sold', 'Yr_Sold', 'Sale_Type',
Exemple #41
0
def main(lrate, n_samples, bsize, n_nets):
    X, Y = make_moons(noise=0.2, random_state=0, n_samples=1000)
    X, Y = shuffle(X, Y)
    X = scale(X)
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5)
    net = get_net()
    sampler = SGHMCSampler(precondition=True, ignore_burn_in=True)
    all_params = lasagne.layers.get_all_params(net, trainable=True)

    Xt = T.matrix()
    Yt = T.matrix()
    U, params = neg_log_like(net, Xt, Yt, Xsize=X_train.shape[0])
    # we could also use these updates in our custom function
    # but instead we will use the sampler.step function below
    updates = sampler.prepare_updates(U,
                                      params,
                                      lrate,
                                      mdecay=0.01,
                                      inputs=[Xt, Yt])
    err = class_error(net, Xt, Yt)
    compute_err = theano.function([Xt, Yt], err)
    predict = theano.function([Xt], lasagne.layers.get_output(net, Xt))

    print("Starting sampling")
    samples = deque(maxlen=n_nets)
    for i in range(n_samples):
        start = (i * bsize) % (X_train.shape[0] - bsize)
        xmb = floatX(X_train[start:start + bsize])
        ymb = floatX(Y_train[start:start + bsize]).reshape((-1, 1))
        _, nll = sampler.step(xmb, ymb)
        if i % 1000 == 0:
            total_err, total_nll = compute_err(floatX(X_train),
                                               floatX(Y_train).reshape(-1, 1))
            print("{}/{} : NLL = {} TOTAL={} ERR = {}".format(
                i, n_samples, nll, total_nll, total_err))
        if i % 200 == 0:
            samples.append(lasagne.layers.get_all_param_values(net))

    # get predictions
    grid = np.mgrid[-3:3:100j, -3:3:100j]
    grid_2d = floatX(grid.reshape(2, -1).T)
    preds = np.zeros((grid_2d.shape[0], len(samples)))
    preds_test = np.zeros((X_test.shape[0], len(samples)))
    for i, sample in enumerate(samples):
        lasagne.layers.set_all_param_values(net, sample)
        preds[:, i] = predict(grid_2d).reshape(-1)
        preds_test[:, i] = predict(floatX(X_test)).reshape(-1)

    mean_pred = np.mean(preds, axis=1)
    std_pred = np.std(preds, axis=1)

    mean_pred_test = np.mean(preds_test, axis=1)
    class_pred_test = mean_pred_test > 0.5
    std_pred_test = np.std(preds_test, axis=1)

    cmap = sns.diverging_palette(250, 12, s=85, l=25, as_cmap=True)
    fig, ax = plt.subplots(figsize=(10, 6))
    contour = ax.contourf(grid[0],
                          grid[1],
                          mean_pred.reshape(100, 100),
                          cmap=cmap,
                          alpha=1.)
    ax.scatter(X_test[class_pred_test == 0, 0], X_test[class_pred_test == 0,
                                                       1])
    ax.scatter(X_test[class_pred_test == 1, 0],
               X_test[class_pred_test == 1, 1],
               color='r')
    cbar = plt.colorbar(contour, ax=ax)
    _ = ax.set(xlim=(-3, 3), ylim=(-3, 3), xlabel='X', ylabel='Y')
    cbar.ax.set_ylabel(
        'Posterior predictive mean probability of class label = 0')

    cmap = sns.cubehelix_palette(light=1, as_cmap=True)
    fig, ax = plt.subplots(figsize=(10, 6))
    contour = ax.contourf(grid[0],
                          grid[1],
                          std_pred.reshape(100, 100),
                          cmap=cmap)
    ax.scatter(X_test[class_pred_test == 0, 0], X_test[class_pred_test == 0,
                                                       1])
    ax.scatter(X_test[class_pred_test == 1, 0],
               X_test[class_pred_test == 1, 1],
               color='r')
    cbar = plt.colorbar(contour, ax=ax)
    _ = ax.set(xlim=(-3, 3), ylim=(-3, 3), xlabel='X', ylabel='Y')
    cbar.ax.set_ylabel('Uncertainty (posterior predictive standard deviation)')
    plt.show()
Exemple #42
0
    def make(self, reself: tp.Optional[bool] = False, **kwargs):
        """

        Generate a heatmap plot from the selected 
        columns of the object's dataframe.

            **Parameters**

                reself

                    A logical variable. If ``True``, an instance of the 
                    object will be returned upon exit to the calling 
                    routine. The default value is ``False``.

            **Returns**

                the object self if ``reself = True`` otherwise, ``None``.
                However, this method causes side-effects by manipulating
                the existing attributes of the object.

        """

        for key in kwargs.keys():
            if hasattr(self, key):
                setattr(self, key, kwargs[key])
            elif key == "dataFrame":
                setattr(self, "_dfref", wref.ref(kwargs[key]))
            else:
                raise Exception(newline + "Unrecognized input '" + key +
                                "' class attribute detected." + newline +
                                self._getDocString())

        # set what to plot

        ############################################################################################################################
        #### xticklabels / yticklabels properties
        ############################################################################################################################

        if isinstance(self.xticklabels.kws, Struct):
            if "horizontalalignment" not in vars(self.xticklabels.kws).keys():
                self.xticklabels.kws.horizontalalignment = "right"
            if "rotation" not in vars(self.xticklabels.kws).keys():
                self.xticklabels.kws.rotation = 45
        else:
            raise Exception(
                newline +
                "The xticklabels.kws component of the current HeatMapPlot object must"
                + newline +
                "be an object of class Struct(), essentially a structure with components"
                + newline +
                "whose names are the input arguments to the set_xticklabels() method of the"
                + newline + "Axes class of the matplotlib library." + newline +
                self._getDocString())

        if isinstance(self.yticklabels.kws, Struct):
            if "horizontalalignment" not in vars(self.yticklabels.kws).keys():
                self.yticklabels.kws.horizontalalignment = "right"
            if "rotation" not in vars(self.yticklabels.kws).keys():
                self.yticklabels.kws.rotation = 45
        else:
            raise Exception(
                newline +
                "The yticklabels.kws component of the current HeatMapPlot object must"
                + newline +
                "be an object of class Struct(), essentially a structure with components"
                + newline +
                "whose names are the input arguments to the set_yticklabels() method of the"
                + newline + "Axes class of the matplotlib library." + newline +
                self._getDocString())

        ############################################################################################################################
        #### heatmap properties
        ############################################################################################################################

        if isinstance(self.heatmap.kws, Struct):
            if "square" not in vars(self.heatmap.kws).keys():
                self.heatmap.kws.square = True
            if "cmap" not in vars(self.heatmap.kws).keys():
                try:
                    import seaborn as sns
                    self.heatmap.kws.cmap = sns.diverging_palette(
                        h_neg=self._colorStart,
                        h_pos=self._colorEnd,
                        n=self._colorCount)
                except:
                    if self._isdryrun:
                        self.heatmap.kws.cmap = None
                    else:
                        raise Exception(
                            newline +
                            "Failed to set the heatmap.kws.cmap component of the current HeatMapPlot object."
                            + newline +
                            "This component depends on the external seaborn Python library. Therefore, it is "
                            + newline +
                            "likely that the seaborn library or one of the required components of it, such as "
                            + newline +
                            "the matplotlib Python library is not properly installed on your system. Please "
                            + newline +
                            "fix this issue, otherwise, the visualization tools of the ParaMonte library "
                            + newline +
                            "will not work as expected. You can install the seaborn library by typing "
                            + newline +
                            "the following commands in your Anaconda3 or Bash command prompt: "
                            + newline + newline +
                            "    pip install --user --upgrade matplotlib" +
                            "    pip install --user --upgrade seaborn" +
                            newline + self._getDocString())
        else:
            raise Exception(
                newline +
                "The heatmap.kws component of the current HeatMapPlot object must"
                + newline +
                "be an object of class Struct(), essentially a structure with components"
                + newline +
                "whose names are the input arguments to the heatmap() function of the"
                + newline + "seaborn library." + newline +
                self._getDocString())

        ############################################################################################################################
        #### figure properties
        ############################################################################################################################

        if self.figure.enabled:
            if isinstance(self.figure.kws, Struct):
                if "dpi" not in vars(self.figure.kws).keys():
                    self.figure.kws.dpi = 150
                if "facecolor" not in vars(self.figure.kws).keys():
                    self.figure.kws.facecolor = "w"
                if "edgecolor" not in vars(self.figure.kws).keys():
                    self.figure.kws.edgecolor = "w"
            else:
                raise Exception(
                    newline +
                    "The figure.kws component of the current DensityPlot object must"
                    + newline +
                    "be an object of class Struct(), essentially a structure with components"
                    + newline +
                    "whose names are the input arguments to the figure() function of the"
                    + newline + "matplotlib library." + newline +
                    self._getDocString())

        ############################################################################################################################
        ############################################################################################################################
        if self._isdryrun: return
        ############################################################################################################################
        ############################################################################################################################

        import seaborn as sns
        import matplotlib.pyplot as plt
        plt.ion(
        )  # turn on the interactive mode. Used to detach the figure from the command line in ipython

        ############################################################################################################################
        #### generate figure and axes if needed
        ############################################################################################################################

        self._constructBasePlot()

        ############################################################################################################################
        #### check data type
        ############################################################################################################################

        self._checkDataType()

        ############################################################################################################################
        #### check rows presence. This must be checked here, because it depends on the integrity of the in input dataFrame.
        ############################################################################################################################

        if self.rows is None: self.rows = range(len(self._dfref().index))
        rownames = self._dfref().index[self.rows]

        ############################################################################################################################
        #### check columns presence. This must be checked here, because it depends on the integrity of the in input dataFrame.
        ############################################################################################################################

        colnames, colindex = pm.dfutils.getColNamesIndex(
            self._dfref().columns, self.columns)

        ############################################################################################################################
        #### set up tick labels
        ############################################################################################################################

        xtickExists = True
        if "xticklabels" in vars(self.heatmap.kws).keys():
            if not any(self.heatmap.kws.xticklabels): xtickExists = False
        else:
            self.heatmap.kws.xticklabels = colnames

        ytickExists = True
        if "yticklabels" in vars(self.heatmap.kws).keys():
            if not any(self.heatmap.kws.yticklabels): ytickEyists = False
        else:
            self.heatmap.kws.yticklabels = rownames

        ############################################################################################################################
        #### plot data
        ############################################################################################################################

        if self.annotPrecision is None:
            data = self._dfref().iloc[self.rows, colindex]
        else:
            data = self._dfref().iloc[self.rows, colindex].round(
                decimals=self.annotPrecision)

        self.currentFig.axes = sns.heatmap(data=data, **vars(self.heatmap.kws))

        ############################################################################################################################
        #### configure the tick labels (orientation, ...)
        ############################################################################################################################

        self.currentFig.axes.set_xticklabels(
            self.currentFig.axes.get_xticklabels(),
            **vars(self.xticklabels.kws))

        self.currentFig.axes.set_yticklabels(
            self.currentFig.axes.get_yticklabels(),
            **vars(self.yticklabels.kws))

        plt.tight_layout()

        if self.figure.enabled:

            # default figure size

            figWidth = 6.4  # inches
            figHeight = 4.8  # inches
            figWidthScale = 1
            figHeightScale = 1
            threshDimension = 10

            # scale only if ticklabels are present

            if xtickExists:
                figWidthScale = max(1,
                                    self._dfref().shape[1] / threshDimension)
                figWidth *= figWidthScale

            if ytickExists:
                figHeightScale = max(1,
                                     self._dfref().shape[0] / threshDimension)
                figHeight *= figHeightScale

            self.currentFig.figure.set_size_inches(figWidth, figHeight)

        ############################################################################################################################

        if reself: return self
# In[ ]:

import seaborn as sns
corr = df_test[[
    "P2", "P3", "norm_len_name", "title_ms", "title_mrs", "title_mr",
    "title_others", "is_male", "age_norm", "norm_family_size", "norm_fare",
    "cab_b", "cab_c", "cab_d", "cab_e", "cab_f", "cab_g", "cab_z", "embQ",
    "embS"
]].corr()

mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(0, 50, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr,
            mask=mask,
            cmap=cmap,
            vmax=.3,
            center=0,
            square=True,
            linewidths=.5,
            cbar_kws={"shrink": .5})

# In[ ]:
pd.isnull(laWeather).sum()
"""# Analysis: Weather"""

# a weather correlation matrix was created to see what kind of relationships each variable had with one another

corr = laWeather.corr()
corr.style.background_gradient(cmap='coolwarm')

# weather correlation matrix 2
fig, ax = plt.subplots(figsize=(10, 8))
corr = laWeather.corr()
ax = sns.heatmap(corr,
                 vmin=-1,
                 vmax=1,
                 center=0,
                 cmap=sns.diverging_palette(20, 220, n=200),
                 square=True)
ax.set_xticklabels(ax.get_xticklabels(),
                   rotation=45,
                   horizontalalignment='right')

# precipitation by hour heatmap
#x = ['1am', '2am', '3am', '4am', '5am', '6am', '7am', '8am', '9am', '10am', '11am', '12pm', '1pm', '2pm', '3pm', '4pm', '5pm', '6pm', '7pm', '8pm', '9pm', '10pm', '11pm', '12am']
rain_hour_pt = laWeather.pivot_table(index='Precipitation',
                                     columns='hours',
                                     aggfunc='size')
rain_hour_pt = rain_hour_pt.apply(lambda x: x / rain_hour_pt.max(axis=1))
plt.figure(figsize=(15, 5))
plt.title('Precipitation by hour', fontsize=14)
#plt.xlabel('x')
sns.heatmap(rain_hour_pt, cbar=True, annot=False, fmt=".0f", cmap="Blues")
def correlation_plot():
    data.corr() # first calculate correlation between all columns!
    f, ax = plt.subplots(figsize=(11, 9)) # Set up the matplotlib figure
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.heatmap(data.corr(), mask=mask, cmap=cmap, vmax=.3, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5})
Exemple #46
0
        # TODO: HOW TO SAVE THIS IN A DJ TABLE FOR LATER?
        parsdict = {
            'threshold': r'Threshold $(\sigma)$',
            'bias': r'Bias $(\mu)$',
            'lapselow': r'Lapse low $(\gamma)$',
            'lapsehigh': r'Lapse high $(\lambda)$'
        }
        ylims = [[-5, 105], [-105, 105], [-0.05, 1.05], [-0.05, 1.05]]
        yticks = [[0, 19, 100], [-100, -16, 0, 16, 100], [-0, 0.2, 0.5, 1],
                  [-0, 0.2, 0.5, 1]]

        # pick a good-looking diverging colormap with black in the middle
        cmap = sns.diverging_palette(
            20,
            220,
            n=len(behav['probabilityLeft_block'].unique()),
            center="dark")
        if len(behav['probabilityLeft_block'].unique()) == 1:
            cmap = "gist_gray"
        sns.set_palette(cmap)

        # plot the fitted parameters
        for pidx, (var, labelname) in enumerate(parsdict.items()):
            ax = axes[pidx, 1]
            sns.lineplot(x="date",
                         y=var,
                         marker='o',
                         hue="probabilityLeft_block",
                         linestyle='',
                         lw=0,
Exemple #47
0
def radiocolorf(freq):
    ffreq = (float(freq) - 1.0)/(45.0 - 1.0)
    pal = sns.diverging_palette(200, 60, l=80, as_cmap=True, center="dark")
    return rgb2hex(pal(ffreq))
Exemple #48
0
matplotlib.use('AGG')
import matplotlib.pyplot as plt
import seaborn as sns

if __name__ == '__main__':
    ntop = 30

    cases = pd.read_csv('cases.csv')
    ctrls = pd.read_csv('ctrls.csv')

    genes = ctrls.columns
    scores, pvals = st.ttest_ind(cases, ctrls)
    top_idx = np.argsort(pvals)[:ntop]
    df = pd.DataFrame(np.array([genes[top_idx], pvals[top_idx]]).T,
                      columns=['Gene', 'p'])

    with open('table.tex', 'w') as f:
        f.write(
            tabulate(df,
                     headers=list(df.columns),
                     tablefmt="latex",
                     floatfmt=".4f"))

    plt.figure(tight_layout=True)
    cmap = sns.diverging_palette(255, 1, n=3, as_cmap=True)
    sns.clustermap(pd.concat(
        [ctrls.ix[:, top_idx].sort(axis=1), cases.ix[:,
                                                     top_idx].sort(axis=1)]).T,
                   cmap=cmap)
    plt.savefig('heatmap.png')
    iqr=q75-q25
    min=q25 -(1.5*iqr)
    max=q75 +(1.5*iqr)
    
    bike_df=bike_df.drop(bike_df[bike_df.loc[:,i]<min].index)
    bike_df=bike_df.drop(bike_df[bike_df.loc[:,i]>max].index)

bike_df.describe()


#Feature Selection
f, ax=plt.subplots(figsize=(7,5))
n_names = ['temp','atemp','hum','windspeed']
df = bike_df.loc[:,n_names]
sns.heatmap(df.corr(),mask=np.zeros_like(df.corr(),dtype=np.bool),
           cmap=sns.diverging_palette(220,10,as_cmap=True),ax=ax,annot = True)

cnames = ['season','workingday','weathersit','yr','mnth']
from scipy.stats import chi2_contingency
for i in cnames:
    print(i)
    chi2,p,dof,ex = chi2_contingency(pd.crosstab(bike_df['cnt'],bike_df[i]))
    print(p)
    
#dropping correlated variable
bike_df = bike_df.drop(['atemp'], axis=1)
bike_df.shape

bike_df['temp'] = bike_df['temp']*39
bike_df['hum'] = bike_df['hum']*100
bike_df['windspeed'] = bike_df['windspeed']*67
Exemple #50
0
def dendrogram(root, data_in, labels=None, index=None, model=None, n_max=150):
    """Generate and save the dendrogram obtained from the clustering algorithm.

    This function generates the dendrogram obtained from the clustering
    algorithm applied on the data. The plots will be saved into the appropriate
    folder of the tree-like structure created into the root folder. The row
    colors of the heatmap are the either true or estimated data labels.

    Parameters
    -----------
    root : string
        The root path for the output creation

    data_in : array of float, shape : (n_samples, n_dimensions)
        The low space embedding estimated by the dimensinality reduction and
        manifold learning algorithm.

    labels : array of int, shape : n_samples
        The result of the clustering step.

    index : list of integers (or strings)
        This is the samples identifier, if provided as first column (or row) of
        of the input file. Otherwise it is just an incremental range of size
        n_samples.

    model : sklearn or sklearn-like object
        An instance of the class that evaluates a step. In particular this must
        be a clustering model provided with the clusters_centers_ attribute
        (e.g. KMeans).

    n_max : int, (INACTIVE)
        The maximum number of samples to include in the dendrogram.
        When the number of samples is bigger than n_max, only n_max samples
        randomly extracted from the dataset are represented. The random
        extraction is performed using
        sklearn.model_selection.StratifiedShuffleSplit
        (or sklearn.cross_validation.StratifiedShuffleSplit for legacy
        reasons).
    """
    # define col names
    col = ["$x_{" + str(i) + "}$" for i in np.arange(0, data_in.shape[1], 1)]
    df = pd.DataFrame(data=data_in, columns=col, index=index)

    # -- Code for row colors adapted from:
    # https://stanford.edu/~mwaskom/software/seaborn/examples/structured_heatmap.html
    # Create a custom palette to identify the classes
    if labels is None:
        labels = np.zeros(df.shape[0], dtype=np.short)
    else:
        mapping = dict(
            zip(np.unique(labels), np.arange(np.unique(labels).shape[0])))
        labels = np.vectorize(mapping.get)(labels)

    n_colors = np.unique(labels).shape[0]
    custom_pal = sns.color_palette("hls", n_colors)
    custom_lut = dict(zip(map(str, range(n_colors)), custom_pal))

    # Convert the palette to vectors that will be drawn on the matrix side
    custom_colors = pd.Series(map(str, labels)).map(custom_lut)

    # Create a custom colormap for the heatmap values
    cmap = sns.diverging_palette(220, 20, n=7, as_cmap=True)

    if model.affinity == 'precomputed':
        import scipy.spatial.distance as ssd
        from scipy.cluster.hierarchy import linkage
        # convert the redundant square matrix into a condensed one.
        # Even if the docs of scipy said so, linkage function does not
        # understand that the matrix is precomputed, unless it is 1-dimensional
        Z = linkage(ssd.squareform(data_in),
                    method=model.linkage,
                    metric='euclidean')
        g = sns.clustermap(df,
                           method=model.linkage,
                           row_linkage=Z,
                           col_linkage=Z,
                           linewidths=.5,
                           cmap=cmap)

    else:
        # workaround to a different name used for manhattan/cityblock distance
        if model.affinity == 'manhattan':
            model.affinity = 'cityblock'

        g = sns.clustermap(df,
                           method=model.linkage,
                           metric=model.affinity,
                           row_colors=custom_colors,
                           linewidths=.5,
                           cmap=cmap)

    plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=5)
    filename = os.path.join(
        root,
        os.path.basename(root) + '_dendrogram.' + DEFAULT_EXT)
    g.savefig(filename)
    logging.info('Figure saved %s', filename)
    plt.close()
Exemple #51
0
for i in headers:
    plt.hist(data[i])

    plt.xlabel(i)
    plt.ylabel('Count')

    hname = 'hist_' + i + '.png'
    plt.savefig(hname)
    plt.clf()

## correlation martrix (seaborn heatmap)
f, ax = plt.subplots(figsize=(10, 8))
corr = df.corr()
sns.heatmap(corr,
            mask=np.zeros_like(corr, dtype=np.bool),
            cmap=sns.diverging_palette(100, 220, as_cmap=True),
            square=True,
            ax=ax)

plt.savefig('diabetes_corr_matrix.png')
plt.clf()

## 2 feature/variable scatter plots (Y vs. X)
for i in range(0, len(headers)):
    xvar = headers[i]

    for j in range(0, len(headers) - 1):
        if i != j:
            yvar = headers[j]
            plt.scatter(data[xvar], data[yvar])
Exemple #52
0
test_features = generator.return_vec(
    test_atoms, [generator.eigenspectrum_vec, generator.composition_vec])

print('{} shape training feature matrix'.format(np.shape(train_features)))
print('{} shape testing feature matrix'.format(np.shape(test_features)))

# After this, we can analyze the distribution of the feature sets. In the following, we see a large number of features in the latter half of the vectors tend to be zero.

# In[6]:

dif = np.max(train_features, axis=0) - np.min(train_features, axis=0)
np.place(dif, dif == 0., [1.])
mean = np.mean(train_features, axis=0)
scaled = (train_features.copy() - mean) / dif
plt.figure(num=0, figsize=(30, 15))
cmap = sns.diverging_palette(250, 15, s=75, l=40, n=1000, center="dark")
sns.heatmap(scaled, cmap=cmap)
plt.savefig('train_features.png')

# In[7]:

scaled = (test_features.copy() - mean) / dif
plt.figure(num=1, figsize=(30, 15))
cmap = sns.diverging_palette(250, 15, s=75, l=40, n=1000, center="dark")
sns.heatmap(scaled, cmap=cmap)
plt.savefig('test_features.png')

# We can make some parallel coordinate plots using pandas to get a slightly better idea of how the feature vectors look. Initially, we set up the dataframe containing the training data.

# In[8]:
Exemple #53
0
### FACETING multiple line plots, especially for different cols with different scales  ###
df.plot(subplots=True,
                linewidth=0.5,
                layout=(2, 4),   # specifies no. of rows & cols in the figure
                figsize=(16, 10),
                sharex=False,
                sharey=False)
plt.show()



### heatmap of correlation matric
corr_mat = df.corr(method='pearson')
import seaborn as sns
#sns.heatmap(corr_mat, annot=True, linewidths=0.4, annot_kws={"size": 10})
sns.heatmap(corr_mat, mask=np.zeros_like(corr_mat, dtype=np.bool), cmap=sns.diverging_palette(220, 10, as_cmap=True),
square=True, ax=ax)
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.show()

# clustermap to group together similar columns (using hierarchical clustering)
sns.clustermap(corr_mat, row_cluster=True, col_cluster=True,)
plt.setp(fig.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
plt.setp(fig.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
plt.show()



### scatterplots, with points colored by group 
import numpy as np
Exemple #54
0
#选取IV>0.01的变量
high_IV = {k:v for k, v in IV_dict.items() if v >= 0.02}
high_IV_sorted = sorted(high_IV.items(),key=lambda x:x[1],reverse=True)

short_list = high_IV.keys()
short_list_2 = []
for var in short_list:
    newVar = var + '_WOE'
    trainData[newVar] = trainData[var].map(WOE_dict[var])
    short_list_2.append(newVar)

#对于上一步的结果,计算相关系数矩阵,并画出热力图进行数据可视化
trainDataWOE = trainData[short_list_2]
f, ax = plt.subplots(figsize=(10, 8))
corr = trainDataWOE.corr()
sns.heatmap(corr, mask=np.zeros_like(corr, dtype=np.bool), cmap=sns.diverging_palette(220, 10, as_cmap=True),square=True, ax=ax)
plt.show()

#两两间的线性相关性检验
#1,将候选变量按照IV进行降序排列
#2,计算第i和第i+1的变量的线性相关系数
#3,对于系数超过阈值的两个变量,剔除IV较低的一个
deleted_index = []
cnt_vars = len(high_IV_sorted)
for i in range(cnt_vars):
    if i in deleted_index:
        continue
    x1 = high_IV_sorted[i][0]+"_WOE"
    for j in range(cnt_vars):
        if i == j or j in deleted_index:
            continue
# Select a subset of the networks
used_networks = [1, 5, 6, 7, 8, 11, 12, 13, 16, 17]
used_columns = (
    df.columns.get_level_values("network").astype(int).isin(used_networks))
df = df.loc[:, used_columns]

# Create a custom palette to identify the networks
network_pal = sns.cubehelix_palette(len(used_networks),
                                    light=.9,
                                    dark=.1,
                                    reverse=True,
                                    start=1,
                                    rot=-2)
network_lut = dict(zip(map(str, used_networks), network_pal))

# Convert the palette to vectors that will be drawn on the side of the matrix
networks = df.columns.get_level_values("network")
network_colors = pd.Series(networks).map(network_lut)

# Create a custom colormap for the heatmap values
cmap = sns.diverging_palette(h_neg=210, h_pos=350, s=90, l=30, as_cmap=True)

# Draw the full plot
sns.clustermap(df.corr(),
               row_colors=network_colors,
               linewidths=.5,
               col_colors=network_colors,
               figsize=(13, 13),
               cmap=cmap)
Exemple #56
0
    for date_forward in range(1, lags + 1):
        print(date_forward)
        dv_forward = dv + '_forward' + str(date_forward)
        dfcoeff, dfpvals = correlation_matrix(df_temp, var_list_wout_dv,
                                              dv_forward)
        dfcoeff_all_lags = pd.concat([dfcoeff_all_lags, dfcoeff], axis=1)
        dfpvals_all_lags = pd.concat([dfpvals_all_lags, dfpvals], axis=1)
    return dfcoeff_all_lags, dfpvals_all_lags


dfcoeff_all_lags, dfpvals_all_lags = viz_corrs_of_lagged_variables_with_dv(
    df_test, 10, 'resting_hr', var_list_wout_dv, date_to_hr_resting_dict)
# i need to get the n here to see how much i'm shinking sample size

fig = plt.figure(figsize=(8, 6))
cmap_enter = sns.diverging_palette(15, 125, sep=10, s=70, l=50, as_cmap=True)
sns.heatmap(dfcoeff_all_lags,
            center=0,
            square=False,
            annot=True,
            fmt='.2f',
            annot_kws={'size': 11},
            cmap=cmap_enter,
            vmin=-.4,
            vmax=.4,
            cbar_kws=dict(use_gridspec=False, location='top'))  # cbar=False,

# NEXT --
# MAKE SURE NOT LOOSING TONS OF N FOR CORR
# CAN I GET PARTIAL OR SEMI-PARTIAL (YEAH) CORRS? HOW TO COMPUTE?
# PARTIALLING OUT PRIOR RESTING HR AT THE SAME LAG
Exemple #57
0
# In[44]:

####Correlation Plot#######

corr = data.corr()

# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(15, 13))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr,
            mask=mask,
            cmap=cmap,
            vmax=.3,
            center=0,
            square=True,
            linewidths=.5,
            cbar_kws={"shrink": .5})

# # Statistical Model - Logistic Model

# In[ ]:
for q in range (0,4,2):
    contact_var_anxiety_BC[r] = you_inf_anxiety_count[q] # for age group bc
    r += 1

for q in range (0,4,2):
    contact_var_anxiety_BC[r] = home_inf_anxiety_count[q] #for education bc
    r += 1

for q in range (0,4,2):
    contact_var_anxiety_BC[r] = fnr_inf_anxiety_count[q]# for marital bc 
    r += 1


f, ax = plt.subplots(figsize=(10, 8))

sns.heatmap(contact_var_anxiety_BC, vmin=0, vmax=1350, xticklabels=x_labels, yticklabels = y_labels,   mask=np.zeros_like(contact_var_anxiety_BC, dtype=np.bool), cmap=sns.diverging_palette(220, 10, as_cmap=True),
            square=True, ax=ax)


#####################################################################


################# for Contact DC data ###########################
contact_var_anxiety_DC = np.zeros((8,5),dtype = np.double)

r = 0
for q in range (1,4,2):
    contact_var_anxiety_DC[r] = consultancy_anxiety_count[q] # for age group bc
    r += 1

for q in range (1,4,2):
Exemple #59
0
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

from paper_behavior_functions import (seaborn_style, institution_map,
                                      group_colors, figpath, load_csv,
                                      FIGURE_WIDTH, FIGURE_HEIGHT, num_star)

# Load some things from paper_behavior_functions
figpath = Path(figpath())
seaborn_style()
institution_map, col_names = institution_map()
pal = group_colors()
cmap = sns.diverging_palette(20, 220, n=3, center="dark")

# ========================================== #
#%% 1. GET GLM FITS FOR ALL MICE
# ========================================== #

print('loading model from disk...')
params_basic = load_csv('model_results', 'params_basic.csv')
params_full = load_csv('model_results', 'params_full.csv')
combined = params_basic.merge(params_full,
                              on=['institution_code', 'subject_nickname'])

# ========================================== #
# PRINT SUMMARY AND STATS
# ========================================== #
Exemple #60
0
    full_corr_df = pd.merge(corr_df,
                            pval_df,
                            how='left',
                            left_on='feature',
                            right_on='feature')
    full_corr_df.sort_values(by='pearsonr', inplace=True, ascending=False)
    full_corr_df.head()
    full_corr_df.tail()

    if plot_hist:
        _ = full_corr_df.hist(column='pearsonr', figsize=(10, 7), grid=False)

    return full_corr_df


colors = sns.diverging_palette(10, 220, sep=80, n=len(full_corr_df)).as_hex()
colors = colors[::-1]
sns.set_palette(colors)

analysis_type = 'synonymous'  #'primary' #synonymous

if analysis_type == 'primary':
    df = prim_df.copy()
else:
    df = syn_df.copy()

full_corr_df = get_pearsonr_per_feature(df, analysis_type=analysis_type)
title = '[' + analysis_type + ' analysis] Keep only genes with pval < 1'
_ = full_corr_df.plot.barh(x='feature',
                           y='pearsonr',
                           figsize=(16, 18),