Python jointplot 예제들, seaborn.jointplot Python 예제들

예제 #1

0

파일 보기

파일: plot.py 프로젝트: DBlackKat/Sentiment-Data-Analysis

def plotTradeVsNews(tickName):
    path2 = "resultsMKII"
    frame = getNewsNTradingVol(tick_Name,path2)
    newsBuz = []
    tradingVol = []
    newsVol = []
    for i in range(len(frame['tradingVol'])):
        newsBuz.append(frame['NewsBuz'].values[i])
        tradingVol.append(np.log(frame['tradingVol'].values[i]))
        newsVol.append(np.log(frame['NewsVol'].values[i]))
    sns.set(style="ticks")
    x = np.array(newsBuz)
    y = np.array(tradingVol)
    ax = sns.jointplot(x,y,kind="hex",stat_func=kendalltau,color="#4CB391")
    ax.set_axis_labels(xlabel= "News Buz",ylabel="Trading Volume")
    g = sns.jointplot(x, y, kind="kde", size=7, space=0)
    g.set_axis_labels(xlabel= "News Buz",ylabel="Trading Volume")

    x = np.array(newsVol)
    ay = sns.jointplot(x,y,kind="hex",stat_func=kendalltau,color="#4CB391")
    ay.set_axis_labels(xlabel= "News Volume",ylabel="Trading Volume")

    h = sns.jointplot(x, y, kind="kde", size=7, space=0)
    h.set_axis_labels(xlabel= "News Volume",ylabel="Trading Volume")
    sns.plt.show()
    # sns.plt.subplot(2,1,1)#41B3D3
    # a1 = sns.regplot(x="NewsBuz", y="tradingVol", data=frame,ci=None,fit_reg=False,color="#1dad9b")
    # a1.set_ylim([0,4e8])
    # sns.plt.subplot(2,1,2)
    #
    # a2 = sns.regplot(x="NewsVol", y="tradingVol", data=frame,ci=None,fit_reg=False,color="#41B3D3")
    # a2.set_ylim([0,4e8])
    sns.plt.show()

예제 #2

0

파일 보기

파일: doplot_stan.py 프로젝트: aasensio/axial_ratio

    def doplot(self, name):
        """
        Do some plots
        """

        self.trace = pickle.load( open( name, "rb" ) )

        var = np.vstack([self.trace['muCB'][:,0], self.trace['muCB'][:,1], self.trace['sdCB'][:,0], self.trace['sdCB'][:,1]]).T

        corner.corner(var, labels=['$\mu_C$', '$\mu_B$', '$\sigma_C$','$\sigma_B$'], show_titles=True)
        
        pl.show()

        # pl.savefig('{0}.png'.format(name))

        # Just get the first N samples. We shuffle the
        # arrays and get the subsamples
        C = self.trace['CB'][:,:,0]
        np.random.shuffle(C)
        C_slice = C[0:200,:].flatten()
        B = self.trace['CB'][:,:,1]
        np.random.shuffle(B)
        B_slice = B[0:200,:].flatten()

        # First option
        pl.plot(B_slice, C_slice, '.', alpha=0.002)
        pl.show()

        # KDE joint plot
        sns.jointplot(C_slice, B_slice, kind='kde')
        pl.show()

예제 #3

0

파일 보기

파일: seaborn1.py 프로젝트: EnriqueU/M-L

def seaborn_join():
    data = np.random.multivariate_normal([0, 0], [[5, 2], [2, 2]], size=2000)
    data = pd.DataFrame(data, columns=['x', 'y']) 
    with sns.axes_style('white'):
        sns.jointplot("x", "y", data, kind='hex')
    
    plt.show()

예제 #4

0

파일 보기

파일: beam_source.py 프로젝트: b-r-oleary/acme

 def histogram(self,x=None, y=None, l=None, t=None, **kwargs):
     """
     this is a short-cut for creating many possible histograms, at a
     specified beamline location l, or specified time t.
     - if x and y are not input, then it creates a full joint-scatterplot
       for each pair of variables (7 variables total: x,y,z, vx, vy, vz, t)
     - if x is input, it creates a 1d histogram with respect to that parameter
     - if x and y are input, creates a 2d histogram with respect to those parameters
     """
     table = self.to_dataframe(l=l, t=t, latex=True)
     if x is None and y is None:
         g = sns.pairplot(table, **kwargs)
         for ax in g.axes.flat:
             _ = plt.setp( ax.xaxis.get_majorticklabels(), rotation=90)
         return
     if x is not None and y is None:
         x = self._reformat_label(x)
         sns.distplot(table[x], **kwargs)
         plt.xlabel(x)
         return
     if x is not None and y is not None:
         x = self._reformat_label(x)
         y = self._reformat_label(y)
         sns.jointplot(x=x, y=y, data=table, **kwargs);
         return

예제 #5

0

파일 보기

파일: plotUtils.py 프로젝트: tarlen5/pisa

def make_scatter_plot(frame, name, **kwargs):
    """
    Makes a scatter plot of column name in frame.
    """

    column_x = frame[name]
    if name == 'deltam31': column_x*=100.0

    params = []
    exclude = set(['hypo','llh','mctrue'])
    params = list(set(frame.columns).difference(exclude))

    figs = []
    # Plot correlation scatter plot for all other systematics
    for p in params:
        if p == name: continue
        column_y = frame[p]
        if p == 'deltam31': column_y*=100.0
        if 'theta' in p: column_y = np.rad2deg(column_y)

        with sns.axes_style("whitegrid"):
            sns.jointplot(column_x, column_y, size=8, color='b',
                          **kwargs)
            plt.tight_layout()
            figs.append(plt.gcf())

    return figs

예제 #6

0

파일 보기

파일: plot_poi.py 프로젝트: mamsdiallo/ud120-projects

def plotBonusvsSalary(df):
    sns.jointplot(x="bonus", y="salary", data=df)
    fig = plt.gcf()
    fig.set_size_inches(18.5, 10.5)
    fig.savefig('bonusVSsalary.png', dpi=100)    
    #plt.savefig('bonusVSsalary.png')
    plt.show()

예제 #7

0

파일 보기

파일: MovementStatisticsGenerator.py 프로젝트: BioroboticsLab/bb_analysis

	def plot_seaborn( self ):

		# https://stanford.edu/~mwaskom/software/seaborn/tutorial/distributions.html

		data = pd.read_csv( 'movement.csv' ).as_matrix()

		# 1/2 3/4 5/6 7/8
		x_column = 3
		y_column = 4

		limit = 100
		data = data[
			  ( data[:,0] == 0)
			& ( data[:,x_column] > -limit )
			& ( data[:,x_column] < limit )
			& ( data[:,y_column] > -limit )
			& ( data[:,y_column] < limit )
		]

		x = data[:,x_column]
		y = data[:,y_column]

		with sns.axes_style( 'white' ):
			sns.jointplot( x=x, y=y, kind='kde' )  # scatter, reg, resid, hex, kde

		sns.plt.show()

예제 #8

0

파일 보기

파일: matmat_original.py 프로젝트: thran/experiments2.0

def skill_vs_speed(prediction_mode, time_model, data):
    model = TimeCombiner(prediction_mode, time_model)
    Evaluator(data, model).get_report(force_run=True)
    students = data.get_students()
    skills = prediction_mode.get_skills(students)
    fastness = time_model.get_skills(students)
    sns.jointplot(pd.Series(skills), pd.Series(fastness), kind='kde', space=0).set_axis_labels("skill", "speed")

예제 #9

0

파일 보기

파일: read_musicXML.py 프로젝트: rb-roomba/music

def show_graph(data):
    """ Show time series graph of given data. """
    height_list = sorted([[p[0], height(p[1:])] for p in data],
                         key=lambda x: x[0])
    df = pd.DataFrame(height_list)
    df.columns = ["time","height"]
    seaborn.jointplot('time', 'height', data=df)
    plt.show()

예제 #10

0

파일 보기

파일: joint_sampler.py 프로젝트: low-sky/colira

def sbratio(sampler):
    chain = sampler.flatchain
    chain[:,2]=np.abs(chain[:,2])
    chain[:,4]=np.abs(chain[:,4])
    dd = pd.DataFrame(data=chain,
                      columns=['theta','phi','scatter','badfrac','badsig','badmn'])
    with sns.axes_style("white"):
        sns.jointplot("theta", "phi", data, kind="kde");

예제 #11

0

파일 보기

파일: instance.py 프로젝트: ansteh/multivariate

 def plot(self, samples, columns=None):
     if(columns is None):
         df = pd.DataFrame(samples, columns=["x", "y"])
         sns.jointplot(x="x", y="y", data=df)
     else:
         df = pd.DataFrame(samples, columns=[columns[0], columns[1]])
         # sns.jointplot(x=names[0], y=names[1], data=df, xlim=xlim, ylim=ylim)
         sns.jointplot(x=columns[0], y=columns[1], data=df)

예제 #12

0

파일 보기

파일: my_script.py 프로젝트: mosayebi/SAGE

def plot_scatter_hist_sns(x, y):
    #sns.set(color_codes=True)
    #sns.set(style="darkgrid")
    sns.set(style="ticks")
    sns.jointplot(np.array(x), np.array(y), kind="hex", size=4, stat_func=None).set_axis_labels("$\phi$", "$\\theta$")
    with PdfPages('plot4.pdf') as pdf:
         pdf.savefig()
    sns.plt.close()

예제 #13

0

파일 보기

파일: treesearch.py 프로젝트: MaxwellRebo/disco-dop

def plot(data, total, title, width=800.0, unit='', dosort=True,
		target=None, target2=None):
	"""A HTML bar plot given a dictionary and max value."""
	if len(data) > 30 and target is not None:
		df = pandas.DataFrame(index=data)
		df[title] = pandas.Series(data, index=df.index)
		df[target.name] = target.ix[df.index]
		if target2 is not None:
			df[target2.name] = target2.ix[df.index]
		if target.dtype == numpy.number:
			if target2 is None:
				seaborn.jointplot(target.name, title, data=df, kind='reg')
			else:
				seaborn.lmplot(target.name, title, data=df, hue=target2.name)
		else:  # X-axis is categorical
			df.sort_values(by=target.name, inplace=True)
			if target2 is None:
				seaborn.barplot(target.name, title, data=df)
			else:
				seaborn.barplot(target.name, title, data=df, hue=target2.name)
			fig = plt.gcf()
			fig.autofmt_xdate()
		# Convert to D3, SVG, javascript etc.
		# import mpld3
		# result = mpld3.fig_to_html(plt.gcf(), template_type='general',
		# 		use_http=True)

		# Convert to PNG
		figfile = io.BytesIO()
		plt.savefig(figfile, format='png')
		result = '<div><img src="data:image/png;base64, %s"/></div>' % (
				base64.b64encode(figfile.getvalue()).decode('utf8'))
		plt.clf()
		return result

	result = ['<div class=barplot>',
			('<text style="font-family: sans-serif; font-size: 16px; ">'
			'%s</text>' % title)]
	if target is not None:
		data = OrderedDict([(key, data[key]) for key in
				target.sort_values().index if key in data])
	keys = {key.split('_')[0] if '_' in key else key[0] for key in data}
	color = {}
	if len(keys) <= 5:
		color.update(zip(keys, range(1, 6)))
	keys = list(data)
	if dosort:
		keys.sort(key=data.get, reverse=True)
	for key in keys:
		result.append('<br><div style="width:%dpx;" class=b%d></div>'
				'<span>%s: %g %s</span>' % (
				int(round(width * data[key] / total)) if data[key] else 0,
				color.get(key.split('_')[0] if '_' in key else key[0], 1)
					if data[key] else 0,
				htmlescape(key), data[key], unit,))
	result.append('</div>\n')
	return '\n'.join(result)

예제 #14

0

파일 보기

파일: mplotter.py 프로젝트: dantrim/supersusy

def make_JointPlot(plot, region, data, backgrounds) :

    sample_to_plot = []
    if data.name == plot.sample : sample_to_plot.append(data)
    if not len(sample_to_plot) :
        for bk in backgrounds :
            if bk.name == plot.sample : sample_to_plot.append(bk)
    if len(sample_to_plot) == 0 or len(sample_to_plot) > 1 :
        msg('ERROR make_JointPlot received %d samples to plot for plot with name %s'%(len(sample_to_plot), plot.name))
        sys.exit()

    # turn this tree into an array :)
    sample_to_plot = sample_to_plot[0]
    selection_ = '(' + region.tcut + ') * eventweight * ' + str(sample_to_plot.scale_factor)
    tree_array = tree2rec(sample_to_plot.tree, branches=[plot.x_var, plot.y_var],
                            selection=selection_)
    tree_array.dtype.names = (plot.x_var, plot.y_var)
    x_arr = tree_array[plot.x_var]
    y_arr = tree_array[plot.y_var]

    sns.set(style="white")

    # stats?
    stat_func_ = None
    if plot.stat_func == "kendalltau" :
        from scipy.stats import kendalltau
        stat_func_ = kendalltau
    elif plot.stat_func == None :
        from scipy.stats import pearsonr
        stat_func_ = pearsonr

    j_plot_grid = None
    if plot.cmap == None or plot.cmap == "default" :
        j_plot_grid = sns.jointplot(x_arr, y_arr, kind = plot.kind, stat_func=stat_func_, color = plot.color, linewidth = plot.line_width, ylim=[plot.y_range_min,plot.y_range_max], xlim=[plot.x_range_min,plot.x_range_max])
        #j_plot_grid = sns.jointplot(x_arr, y_arr, kind = plot.kind, stat_func=stat_func_, color = plot.color, linewidth = plot.line_width, joint_kws={"n_levels":plot.n_levels, "shade":True}, ylim=[plot.y_range_min,plot.y_range_max], xlim=[plot.x_range_min,plot.x_range_max])

    elif plot.cmap == "cubehelix" :
        cmap_ = sns.cubehelix_palette(as_cmap=True, dark=0, light=1, reverse = True)
        j_plot_grid = sns.jointplot(x_arr, y_arr, kind = plot.kind, stat_func=stat_func_, linewidth = plot.line_width, joint_kws={"cmap":cmap_, "n_levels":plot.n_levels, "shade":True}, ylim=[plot.y_range_min, plot.y_range_max], xlim=[plot.x_range_min,plot.x_range_max])
    elif plot.cmap == "blues" :
        j_plot_grid = sns.jointplot(x_arr, y_arr, kind = plot.kind, stat_func=stat_func_, linewidth = 1.0, joint_kws={"cmap":"Blues", "n_levels":plot.n_levels, "shade":True, "shade_lowest":False}, ylim=[plot.y_range_min, plot.y_range_max], xlim=[plot.x_range_min,plot.x_range_max])
    else :
        msg("cmap attribute of joint plot not yet added")
        sys.exit()

    j_plot_grid.fig.suptitle(plot.title)
    j_plot_grid.fig.subplots_adjust(top=0.935)
    j_plot_grid.set_axis_labels(plot.x_label, plot.y_label)


    # save the plot to file
    outname = plot.name + ".eps"
    j_plot_grid.savefig(outname)
    out = indir + "/plots/" + outdir 
    utils.mv_file_to_dir(outname, out, True)
    fullname = out + "/" + outname
    msg("%s saved to : %s"%(outname, os.path.abspath(fullname)))

예제 #15

0

파일 보기

파일: plotter.py 프로젝트: verajohne/SEP_autoencoder

def plot_approx_posterior(cov, means, index):
	mean = means[index]
	print mean.shape
	mean, cov = util.product_gaussians(mean, np.zeros(2), cov, np.identity(2))
	data = np.random.multivariate_normal(mean, cov, 200)
	df = pd.DataFrame(data, columns=["x", "y"])
	xlim = (mean[0] - 3*np.sqrt(cov[0][0]),mean[0] + 3*np.sqrt(cov[0][0]))
	ylim = (mean[1] - 3*np.sqrt(cov[1][1]),mean[1] + 3*np.sqrt(cov[1][1]))
	sns.jointplot(x="x", y="y", data=df, kind="kde", stat_func= None, xlim = xlim, ylim = ylim)

예제 #16

0

파일 보기

파일: show_graph.py 프로젝트: rb-roomba/music

def plot_var(times, pitches, ends, var_n):
    """ Show time series graph of variation [var_n]. """
    # var_n: 0 to 30 (0: Aria)
    n_data = filter(lambda x:(ends[var_n] < x[0] <= ends[var_n+1]),
                    zip(times, pitches))
    # seaborn
    df = pd.DataFrame(n_data)
    df.columns = ["time","height"]
    seaborn.jointplot('time', 'height', data=df)
    plt.show()

예제 #17

0

파일 보기

파일: decay.py 프로젝트: z01nl1o02/tests

 def show(self):
     Y = np.reshape(self._pr,(1,-1)).tolist()[0]
     X = self._lams
     df = pd.DataFrame({'x':X,'y':Y})
     sns.jointplot(x='x',y='y',data=df)
     Y = np.asarray(Y)
     X = np.asarray(X)
     mean = (X*Y).sum()
     sns.plt.title('mean %f'%mean)
     sns.plt.show()

예제 #18

0

파일 보기

파일: plotting.py 프로젝트: schevalier/MJHMC

def hist_2d(distribution, nsamples, **kwargs):
    """
    Plots a 2d hexbinned histogram of distribution
    """
    distr = distribution(ndims=2)
    sampler = MarkovJumpHMC(distr.Xinit, distr.E, distr.dEdX, **kwargs)
    samples = sampler.sample(nsamples)

    with sns.axes_style("white"):
        sns.jointplot(samples[0], samples[1], kind="kde", stat_func=None)

예제 #19

0

파일 보기

파일: data_analysis.py 프로젝트: AndrewCr0w/ml-playground

def pairwise_joint_plots(df, cols):
    logging.debug('Plotting pairwise joint distributions')
    cols = sorted(cols)
    for colA, colB in [(a,b) for a in cols for b in cols if a < b]:
        file = 'joint_{}_{}.png'.format(colA, colB)
        logging.debug('joint plot: %s', file)
        fig = plt.figure()
        sns.jointplot(df[colA], df[colB], kind='hex')
        plt.savefig(file)
        plt.close()

예제 #20

0

파일 보기

파일: KernelDensity.py 프로젝트: ryscet/TopDown

def AnalyzeAllElectrodes():
    """From Jacek """
    path = '/Users/ryszardcetnarski/Desktop/Nencki/Badanie_NFB/Dane/wszystkie_elektrody_jacek.csv'
    db = pd.read_csv(path)

    for band in ['theta', 'alpha','smr', 'beta1', 'beta2']:
        db[band+'_po'] = db[band+ '_przed'] + db[band+'_roznica']
    #    fig = plt.figure()
     #   fig.suptitle(band)
      #  corr = fig.add_subplot(211)
      #  diff = fig.add_subplot(212)

        sns.jointplot(band +'_przed', band+'_po', data=db, kind="reg")#, color="r", size=7)

      #  fig = plt.figure()
       # fig.suptitle(band)

        sns.jointplot(band +'_przed', band+'_roznica', data=db, kind="reg")#, color="r", size=7)
        conditions_str = ['mixed_conditions' for i in range(0,len(db))]
        conditions = [0 for i in range(0,len(db))]
        GeneralModel( db[band+ '_przed'] ,  db[band+ '_po'] , band, conditions, conditions_str)
        #corr.scatter(db[band +'_przed'], db[band+'_po'])
        #diff.scatter(db[band +'_przed'], db[band+'_roznica'])

    return db







#Kde using sklearn, returns object
   # kde = KernelDensity(kernel='tophat', bandwidth = 3).fit(initial[:, np.newaxis])
   # log_dens = kde.score_samples(x[:, np.newaxis])

    #Plot sklearn kernel estimate
   # kernel.plot(x, np.exp(log_dens), 'g')
    #Plot original data histogram


  #followUp = np.random.random_sample(100)
    #followUp= np.random.normal(20,10, 100)

    #followUp = np.random.normal(20,10, 100)#initial + np.random.normal(0,100,100)
    #followUp = np.random.random_sample(100)#initial + np.random.normal(0,100,100)
    #hist.hist(initial)

    #initial = np.random.normal(20,10, 100)
    #initial = np.random.random_sample(100)


    #Add noise to each observation
    #initial = #initial *0.95 + np.random.normal(100,100,100)
    #Make a follow up by adding nosie second time to the same population

예제 #21

0

파일 보기

파일: housing_prices.py 프로젝트: tarlen5/coursera_ml

def plotCorrelation(frame):

    # Plot correlation of each variable to visualize each dimension:
    sns.jointplot("bedrooms","price",frame,size=8)
    plt.tight_layout()
    sns.jointplot("size","price",frame,size=8)
    plt.tight_layout()

    print("PAUSED...close figures to continue...")
    plt.show()
    return

예제 #22

0

파일 보기

파일: plotting.py 프로젝트: schevalier/MJHMC

def gauss_2d(nsamples=1000):
    """
    Another simple test plot
    1d gaussian sampled from each sampler visualized as a joint 2d gaussian
    """
    gaussian = misc.distributions.TestGaussian(ndims=1)
    control = Control(gaussian.Xinit, gaussian.E, gaussian.dEdX)
    experimental = ContinuousTimeHMC(gaussian.Xinit, gaussian.E, gaussian.dEdX)

    with sns.axes_style("white"):
        sns.jointplot(control.sample(nsamples)[0], experimental.sample(nsamples)[0], kind="hex", stat_func=None)

예제 #23

0

파일 보기

파일: drawPlot.py 프로젝트: WQ-huziang/WQ-Testcode

 def drawJointPlot(self, se1, se2):
     """
     画线性相关图，表示序列1和序列2的相关性
         :param self: 类变量本身
         :param se1: 序列1
         :param se2: 序列2
     """   
     sns.jointplot(se1, se2, kind='reg', color=self.linecolors[0])
     # plt.title(self.title)
     plt.legend()
     plt.show()

예제 #24

0

파일 보기

파일: analysis.py 프로젝트: EhsanTadayon/alleninf

def fixed_effects(data, labels):
    
    corcoeff, p_val = pearsonr(data[labels[0]], data[labels[1]])
    print "Pearson correlation between %s and %s across all donors is %g (two tailed p value = %g)"%(labels[0], labels[1], corcoeff, p_val)
    
    grid = sns.jointplot(labels[0], labels[1], data, kind="hex")
    sns.jointplot(labels[0], labels[1], data, kind="reg", 
                         xlim=grid.ax_joint.get_xlim(),
                         ylim=grid.ax_joint.get_ylim())
    plt.show()
    
    return corcoeff, p_val

예제 #25

0

파일 보기

파일: assembly.py 프로젝트: Hensonmw/jcvi

def covlen(args):
    """
    %prog covlen covfile fastafile

    Plot coverage vs length. `covfile` is two-column listing contig id and
    depth of coverage.
    """
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from jcvi.formats.base import DictFile

    p = OptionParser(covlen.__doc__)
    p.add_option("--maxsize", default=1000000, type="int", help="Max contig size")
    p.add_option("--maxcov", default=100, type="int", help="Max contig size")
    p.add_option("--color", default='m', help="Color of the data points")
    p.add_option("--kind", default="scatter",
                 choices=("scatter", "reg", "resid", "kde", "hex"),
                 help="Kind of plot to draw")
    opts, args, iopts = p.set_image_options(args, figsize="8x8")

    if len(args) != 2:
        sys.exit(not p.print_help())

    covfile, fastafile = args
    cov = DictFile(covfile, cast=float)
    s = Sizes(fastafile)
    data = []
    maxsize, maxcov = opts.maxsize, opts.maxcov
    for ctg, size in s.iter_sizes():
        c = cov.get(ctg, 0)
        if size > maxsize:
            continue
        if c > maxcov:
            continue
        data.append((size, c))

    x, y = zip(*data)
    x = np.array(x)
    y = np.array(y)
    logging.debug("X size {0}, Y size {1}".format(x.size, y.size))

    df = pd.DataFrame()
    xlab, ylab = "Length", "Coverage of depth (X)"
    df[xlab] = x
    df[ylab] = y
    sns.jointplot(xlab, ylab, kind=opts.kind, data=df,
                  xlim=(0, maxsize), ylim=(0, maxcov),
                  stat_func=None, edgecolor="w", color=opts.color)

    figname = covfile + ".pdf"
    savefig(figname, dpi=iopts.dpi, iopts=iopts)

예제 #26

0

파일 보기

파일: movie_data_handle_v1.py 프로젝트: fzhurd/fzwork

def main():
    movie_raw_data = pd.read_csv('../input/movie_metadata.csv')
    print movie_raw_data.head(3)

    print movie_raw_data.isnull().sum()

    print movie_raw_data.shape
    movie_raw_data_dropna=movie_raw_data.dropna()
    print movie_raw_data_dropna.shape
    print movie_raw_data.dtypes


    # movie_filterd_imdbscore=movie_raw_data['imdb_score'].loc
    # movie_filterd_imdbscore=movie_raw_data.loc[movie_raw_data['imdb_score'].isin([2,3])]

    movie_filterd_imdbscore_first=movie_raw_data.loc[movie_raw_data['imdb_score'] >5]
    movie_filterd_imdbscore_from_raw=movie_raw_data.loc[movie_raw_data['imdb_score'] <8]

    print movie_filterd_imdbscore_first.shape


    movie_filterd_imdbscore_second=movie_filterd_imdbscore_first.loc[movie_raw_data['imdb_score'] <8]

    print movie_filterd_imdbscore_second.shape
    print movie_filterd_imdbscore_from_raw.shape

    print '*********************************'

    print movie_raw_data_dropna.head(3)
    profit=(((movie_raw_data_dropna['gross'].values-movie_raw_data_dropna['budget'].values))/(movie_raw_data_dropna['gross'].values))*100
    print profit

    movie_raw_data_dropna.loc[:,'profit']=pd.Series(profit, movie_raw_data_dropna.index)
    print movie_raw_data_dropna.shape
    print movie_raw_data_dropna.head(3)


    corr=movie_raw_data_dropna.corr()
    print corr

    f, ax = plt.subplots(figsize=(11, 9))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.heatmap(corr, cmap=cmap, vmax=1,
            square=True,
            linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)

    g = sns.jointplot(x="title_year", y="profit",kind='scatter',size=10,ylim = [0,110],xlim=[1980,2020],data=movie_raw_data_dropna)
    h = sns.jointplot(x="imdb_score", y="profit",kind='reg',size=10,ylim = [0,110],data=movie_raw_data_dropna)

    # j = sns.pairplot(movie_raw_data_dropna,hue='content_rating')

    plt.show()

예제 #27

0

파일 보기

파일: c5.py 프로젝트: 3774257/abu

def sample_54_1():
    """
    5.4 使用seaborn可视化数据
    :return:
    """
    sns.distplot(tsla_df['p_change'], bins=80)
    plt.show()

    sns.boxplot(x='date_week', y='p_change', data=tsla_df)
    plt.show()

    sns.jointplot(tsla_df['high'], tsla_df['low'])
    plt.show()

예제 #28

0

파일 보기

파일: analysis_windfield_game1.py 프로젝트: chili-epfl-cellulo/cellulo_log_analysis-DEPRECATED

def occupationAnalysis():
    img = plt.imread("playground.jpg")
    robot_position = readLog( "./csv/windfield_game1_green_withindex_position.csv")
    data=np.zeros((nbcols, nbrows))
    for robotp in robot_position:
        robotp = robotp.split(',')
        print(robotp[0])
        px = int(float(robotp[1]) * nbcols)
        py = int(float(robotp[2]) * nbrows)
        data[px][py]+=1

    robot_position = readLog( "./csv/windfield_game1_orange_withindex_position.csv")
    for robotp in robot_position:
        robotp = robotp.split(',')
        print(robotp[0])
        px = int(float(robotp[1]) * nbcols)
        py = int(float(robotp[2]) * nbrows)
        data[px][py]+=1

    robot_position = readLog( "./csv/windfield_game1_blue_withindex_position.csv")
    for robotp in robot_position:
        robotp = robotp.split(',')
        print(robotp[0])
        px = int(float(robotp[1]) * nbcols)
        py = int(float(robotp[2]) * nbrows)
        data[px][py]+=1
    fig, ax = plt.subplots()
    #heatmap = ax.pcolor(data)
    red_high = ((0., 0., 0.),
         (.3, .5, 0.5),
         (1., 1., 1.))

    blue_middle = ((0., .2, .2),
         (.3, .5, .5),
         (.8, .2, .2),
         (1., .1, .1))

    green_none = ((0,0,0),(1,0,0))

    cdict3 = {'red':  red_high,
     'green': green_none,
     'blue': blue_middle,
     'alpha': ((0.0, 0.0, 0.0),
               (0.3, 0.5, 0.5),
               (1.0, 1.0, 1.0))
    }

    #ax.scatter(x, y, label=str(i), color=color, alpha=0.5)
    #dropout_high = LinearSegmentedColormap('Dropout', cdict3)
    #plt.register_cmap(cmap = dropout_high)
    sns.jointplot(x="x", y="y", data=data, kind="kde");

예제 #29

0

파일 보기

파일: visualise.py 프로젝트: vianziro/msc-thesis

def performance_vs_coverage(db, output=None, max_values=250, **kwargs):
    data = [
        row for row in
        db.execute(
            "SELECT "
            "    performance AS performance, "
            "    coverage "
            "FROM param_stats"
        )
    ]
    frame = pandas.DataFrame(data, columns=("Performance", "Legality"))
    sns.jointplot("Legality", "Performance", data=frame,
                  xlim=(0, 1), ylim=(0, 1))
    viz.finalise(output, **kwargs)

예제 #30

0

파일 보기

파일: determine-vs-probability.py 프로젝트: z01nl1o02/tests

 def show(self):
     pos = np.argsort(self.pr)[0][-20:]
     for k in pos:
         print self.hypos[k],self.pr[0,k]
     pos = np.argmax(self.pr)
     print 'max',self.hypos[pos],'pr=',self.pr[0,pos]
     X = []
     for idx,hypo in enumerate(self.hypos):
         N,f = hypo
         X.append(idx)
     Y = self.pr.tolist()[0]
     df = pd.DataFrame({'x':X,'y':Y})
     sns.jointplot(x='x',y='y',data=df)
     sns.plt.show()

예제 #31

0

파일 보기

# In[25]:

# comapre with men and women that who have more target zero and who have not
fig, ax = plt.subplots(figsize=(10, 5))
sns.countplot(df['target'], hue=df['sex'], ax=ax)
plt.xlabel('target')
plt.ylabel('sex')
plt.xticks(rotation=50)
plt.show

# In[26]:

nums = ['age', 'sex', 'trestbps', 'chol', 'trestbps', 'target']
for i in nums:
    plt.figure(figsize=(20, 10))
    sns.jointplot(x=df[i], y=df['target'], kind='reg')
    plt.xlabel(i)
    plt.ylabel('resposne')
    plt.grid()
    plt.show()

# In[8]:

plt.bar(df['target'], df['age'], alpha=.5, width=0.8, label='chart')
plt.show()

# In[62]:

sns.catplot('sex', 'target', data=df, kind='box', hue='fbs')

# In[53]:

예제 #32

0

파일 보기

파일: HousePriceProject_H_S.py 프로젝트: himalithaker/Predicting-House-Cost

print("Minimum Cost: ${}".format(_min_cost)) 
print("Maximum Cost: ${}".format(_max_cost))
print("Mean Cost: ${}".format(_mean_cost))
print("Median Cost ${}".format(_median_cost))
print("Standard deviation of Cost: ${}".format(_stddev_cost))


_housedata['bedrooms'].value_counts().plot(kind='bar')
plt.title('Total number of Bedroom')
plt.xlabel('Bedrooms')
plt.ylabel('Count of Bedrooms')
plt.show()
#sns.despine

plt.figure(figsize=(10,10))
sns.jointplot(x=_housedata.lat.values, y=_housedata.long.values, size=10)
plt.ylabel('Longitude of House', fontsize=12)
plt.xlabel('Latitude of House', fontsize=12)
plt.show()
#plt1 = plt()
#sns.despine

plt.scatter(_housedata.price,_housedata.sqft_living)
plt.title("Price of House vs Square Feet of House")
plt.show()

plt.scatter(_housedata.price,_housedata.long)
plt.title("Price of House vs Location of the house area")
plt.show()

plt.scatter(_housedata.price,_housedata.lat)

예제 #33

0

파일 보기

    names = ['variance','skewness','curtosis','entropy','class'])

data.head(3)
data.describe()
data.shape
data.isna().any()
data.dtypes
data['class'].unique()
sns.countplot(x='class', data= data)
sns.violinplot( y=data['curtosis'])
sns.violinplot( y=data['entropy'])
sns.violinplot( y=data['variance'])
sns.violinplot( y=data['skewness'])
p1=sns.kdeplot(data['curtosis'], shade=True, color="r")
p1=sns.kdeplot(data['variance'], shade=True, color="b")
sns.jointplot(x=data['curtosis'], y=data['entropy'], kind='hex', linewidth = 2)
sns.jointplot(x=data['skewness'], y=data['variance'], kind='hex', color = 'skyblue', linewidth = 2)
sns.jointplot(x=data['curtosis'], y=data['variance'], kind='hex', linewidth = 2)
X = data[['variance', 'skewness' ,'curtosis', 'entropy']]
y = data[['class']]

from sklearn.model_selection import train_test_split # Support Vector Machine
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

from sklearn.svm import SVC
SVC()
svc = SVC()

# Creating a dictionary of parameters

parameters = {

예제 #34

0

파일 보기

파일: Langevin-MC.py 프로젝트: XanderJC/MCMC-Project

    def kde(self, n=0):

        sns.jointplot(x=self.samples[:, n, 0],
                      y=self.samples[:, n, 1],
                      kind="kde")

예제 #35

0

파일 보기

파일: experiment_graphsize_distribution.py 프로젝트: scott198510/toulouse-road-network-dataset

def generate_plots(plot_type=""):
    r"""
    Generate plots studying the distribution of graphs in different splits with respect to the graph size (|V| and |E|)
    
    :param plot_type: type of plot in {"histograms", "marginal_E", "marginal_V", "joint"}
    """
    assert plot_type in {"histograms", "marginal_E", "marginal_V", "joint"}
    split_names = ["test", "valid", "train"]

    tot_n_nodes = []
    tot_n_edges = []
    for split_name in split_names:
        d = ToulouseRoadNetworkDataset(split=split_name,
                                       step=0.001,
                                       max_prev_node=8)
        dataloader = DataLoader(d,
                                batch_size=1,
                                shuffle=False,
                                collate_fn=custom_collate_fn)

        n_nodes = []
        n_edges = []
        for datapoint in dataloader:
            this_x_adj, this_x_coord, this_y_adj, this_y_coord, this_img, this_seq_len, this_id = datapoint
            n_edges.append(int(this_y_adj.view(-1).sum().item()))
            n_nodes.append(int(this_seq_len[0] - 2))

        tot_n_edges += n_edges
        tot_n_nodes += n_nodes
        n_nodes = np.array(n_nodes)
        n_edges = np.array(n_edges)

        print(f"{split_name} min/mean/max len nodes", np.min(n_nodes),
              np.mean(n_nodes), np.max(n_nodes))
        print(f"{split_name} min/mean/max len edges", np.min(n_edges),
              np.mean(n_edges), np.max(n_edges))

        if plot_type == "histograms":
            plt.hist(n_nodes, bins=np.max(n_nodes) - np.min(n_nodes) +
                     1)  # arguments are passed to np.histogram
            plt.title(f"Histogram of |V| for {split_name}")
            plt.savefig(f"plots/histogram_|V|_{split_name}.png")
            plt.clf()
            plt.hist(n_edges, bins=np.max(n_edges) - np.min(n_edges) +
                     1)  # arguments are passed to np.histogram
            plt.title(f"Histogram of |E| for {split_name}")
            plt.savefig(f"plots/histogram_|E|_{split_name}.png")
            plt.clf()
        elif plot_type == "marginal_V":
            a = sns.kdeplot(n_nodes, bw=.5, shade=True, label=split_name)
        elif plot_type == "marginal_E":
            b = sns.kdeplot(n_edges, bw=.5, shade=True, label=split_name)
        else:
            sns_plot = sns.jointplot(np.log10(n_nodes),
                                     np.log10(n_edges),
                                     marginal_kws=dict(kernel="gau", bw=.02),
                                     kind="kde",
                                     bw=.05)
            sns_plot.ax_joint.set_xlabel("log10 |V|", fontsize=15)
            sns_plot.ax_joint.set_ylabel("log10 |E|", fontsize=15)
            sns_plot.ax_marg_x.set_title(split_name, fontsize=20)
            sns_plot.ax_joint.set_xlim(0.6, 1.2)
            sns_plot.ax_joint.set_ylim(0.4, 1.2)
            sns_plot.savefig(f"plots/joint_{split_name}.png")

    tot_n_nodes = np.array(tot_n_nodes)
    tot_n_edges = np.array(tot_n_edges)
    print(f"min/mean/max len nodes", np.min(tot_n_nodes), np.mean(tot_n_nodes),
          np.max(tot_n_nodes))
    print(f"min/mean/max len edges\n", np.min(tot_n_edges),
          np.mean(tot_n_edges), np.max(tot_n_edges))

    if plot_type == "marginal_V":
        a.set_xlabel("|V|")
        a.set_ylabel("p(x)")
        a.set_title("Distributions of |V|")
        a.legend()
        a.figure.savefig(f"plots/marginal_|V|.png")
        a.figure.clf()

    if plot_type == "marginal_E":
        b.set_xlabel("|E|")
        b.set_ylabel("p(x)")
        b.set_title("Distributions of |E|")
        b.legend()
        b.figure.savefig(f"plots/marginal_|E|.png")
        b.figure.clf()

    print("Done!")

예제 #36

0

파일 보기

파일: magic_formula.py 프로젝트: steve84/finanzlabor.blog

createFigure(
    figure_data_without_zynex, 'EY_ROC', EARNINGS_YIELD, 'Return On Capital (%)',
    'Earnings Yield (%)', 'ey_roc.png', 'lower right',
    vscaling=1.2, hscaling=2)
createFigure(
    figure_data, 'total_rank', 'EY_rank', 'Rank Return On Capital',
    'Rank Earnings Yield', 'ey_roc_rank.png', 'upper right',
    number_format='%d', vscaling=1.2, hscaling=2)

# Drop outliers
df_capped = df[df[EARNINGS_YIELD].between(
    df[EARNINGS_YIELD].quantile(0.05), df[EARNINGS_YIELD].quantile(0.95))]
df_capped = df_capped[df_capped['ROC'].between(
    df_capped['ROC'].quantile(0.05), df_capped['ROC'].quantile(0.95))]

# Save density plot
ax = sb.jointplot(EARNINGS_YIELD, 'ROC', data=df_capped, kind='kde', color="g")
ax.set_axis_labels('Earnings Yield (%)', 'Return On Capital (%)')
plt.tight_layout()
plt.savefig('density_plot.png', format='png')

plt.clf()

# Create industry histogram
ax = sb.countplot(x=SECTOR, data=figure_data, palette='Blues_d')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
ax.set_ylabel('Amount')
ax.set_xlabel('Industry')
plt.tight_layout()
plt.savefig('industry_histogram.png', format='png')

예제 #37

0

파일 보기

파일: tp5.py 프로젝트: anouel/cours-2017-2018

#%%
#Histogramme
seaborn.distplot(ordis.price)

#%%
# Boîte à moustaches
seaborn.factorplot("price", data=ordis, kind="box")

#%%
# violin
seaborn.factorplot("price", data=ordis, kind="violin")

#%%
# Lien entre price et var quanti (speed, hd)
seaborn.factorplot("speed", "price", data=ordis)
seaborn.jointplot("hd", "price", data=ordis, kind="reg")

#%%
# Lien entre price et var quali (ram, cd, premium, screen)
seaborn.factorplot("ram", "price", data=ordis, kind="box")
seaborn.factorplot("cd", "price", data=ordis, kind="box")
seaborn.factorplot("premium", "price", data=ordis, kind="box")
seaborn.factorplot("screen", "price", data=ordis, kind="box")

#%%
# price ~ speed et hd
t = pandas.crosstab(pandas.cut(ordis.hd, 6, precision=0),
                    ordis.speed,
                    values=ordis.price,
                    aggfunc=numpy.mean)
seaborn.heatmap(t, cmap="Blues", cbar_kws={'label': 'mean price'})

예제 #38

0

파일 보기

# In[18]:

sns.pairplot(sub_task_summary_Output, hue='EV', palette='Set1')

# In[20]:

# SIMPLE LINE PLOT
sub_task_summary_Output['EV'].plot(figsize=(20, 12))

# In[26]:

# In[65]:

plt.figure(figsize=(12, 8))

sns.jointplot(x='SPI', y='EV', data=sub_task_summary_Output, color='hotpink')
sns.jointplot(x='CPI', y='EV', data=sub_task_summary_Output, color='red')
sns.jointplot(x='EAC', y='EV', data=sub_task_summary_Output, color='blue')

#

# In[41]:

# In[55]:

# In[56]:

# In[66]:

# In[67]:

예제 #39

0

파일 보기

파일: plotsV1.py 프로젝트: bjonnh/AMBER

def heatscatter_sns(x, y, figsize=(8, 8)):
    sns.set(rc={'figure.figsize': figsize})
    sns.set(style="white", color_codes=True)
    sns.jointplot(x=x, y=y, kind='kde', color="skyblue")

예제 #40

0

파일 보기

plt.figure(figsize=(10, 25))
sns.countplot(y='country', data=dataset, alpha=alpha)
plt.title('Data by country')
plt.show()

# Between Genders Male vs Female
plt.figure(figsize=(7, 7))
sex = sns.countplot(x='sex', data=dataset)

# Corelation between the Data
plt.figure(figsize=(16, 7))
cor = sns.heatmap(dataset.corr(), annot=True)

g = sns.jointplot(dataset.year,
                  dataset.suicides_no,
                  kind="kde",
                  color="#bfa9e0",
                  size=7)
plt.savefig('graph.png')

# Visualizing which age of people Suicide the most
plt.figure(figsize=(16, 7))
bar_age = sns.barplot(x='sex', y='suicides_no', hue='age', data=dataset)

# Visualizing which Generation of people Suicide the most
plt.figure(figsize=(16, 7))
bar_gen = sns.barplot(x='sex', y='suicides_no', hue='generation', data=dataset)

cat_accord_year = sns.catplot('sex',
                              'suicides_no',
                              hue='age',

예제 #41

0

파일 보기

df = DataFrame(iris.data,columns = iris.feature_names)
df['target'] = iris.target
print(df)

#数据可视化
import pandas as pd
from scipy import stats,integrate
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes = True)
#数据分布可视化，直方图和密度函数
#distplot()函数默认绘出数据的直方图和密度函数
sns.distplot(df['petal length (cm)'],bins = 15)

#jointplot()函数同时绘制散点图和直方图
sns.jointplot(x = 'sepal length (cm)',y = 'sepal width (cm)',data = df,size =8)


#分组散点图
#用seaborn.FacetGrid标记不同的种类
sns.FacetGrid(df,hue = 'target',size =8).map(plt.scatter,'sepal length (cm)','sepal width (cm)').add_legend()


#六边形图
sns.axes_style('white')
sns.jointplot(x = 'sepal length (cm)',y = 'sepal width (cm)',data = df,kind = 'hex',color = 'r')

#二维核密度估计图
g = sns.jointplot(x = 'sepal length (cm)',y = 'sepal width (cm)',data = df,kind = 'kde',color = 'm')
#添加散点图
g.plot_joint(plt.scatter,c='w',s=30,linewidth=1,marker='+')

예제 #42

0

파일 보기

파일: pythonbasics_visualisation.py 프로젝트: mmudgal33/python-projects-codes

sns.distplot(bd['age'], kde=False, norm_hist=True, bins=10)
sns.distplot(bd['age'], hist=False)
sns.distplot(bd['age'], hist=False)

myimg = myplot.get_figure()
myimg.savefig('distplot.png')

sns.kdeplot(bd['age'])  # other distribution plot, less used
sns.kdeplot(bd['age'], shade=True)  # shade area
sns.kdeplot(bd['pdays'], shade=True)

myplot = sns.boxplot(y='age', data=bd)
myimg = myplot.get_figure()
myimg.savefig('boxplot.png')

myplot = sns.jointplot(x='age', y='balance', data=bd.iloc[:500, :])
myimg = myplot.get_figure()  # not work in jointplot
myimg.savefig('jointplot.png')
myplot = sns.jointplot(x='age',
                       y='balance',
                       data=bd.iloc[:100, :],
                       kind='hex',
                       size=10)
# light colour less density,givenby hex
help(sns.jointplot)
sns.jointplot(x='age',
              y='duration',
              data=bd.iloc[:100, :],
              kind='kde',
              size=10)
myplot = sns.lmplot(x='age', y='balance', data=bd.iloc[1:10, :])

예제 #43

0

파일 보기

파일: Mar 26 - Inclass assignment 2.py 프로젝트: rufus95/Data-viz-Python

sns.distplot(data['x'])
sns.distplot(data['y'])

# In[9]:

for col in 'xy':
    sns.kdeplot(data[col], shade=True)

# In[10]:

sns.kdeplot(data)

# In[12]:

with sns.axes_style('white'):
    sns.jointplot("x", "y", data, kind='kde')

# In[13]:

with sns.axes_style('white'):
    sns.jointplot("x", "y", data, kind='hex')

# In[14]:

sns.pairplot(data)

# In[20]:

import plotly.graph_objs as go
import numpy as np
x = np.random.randn(2000)

예제 #44

0

파일 보기

파일: testseaborn.py 프로젝트: Alafazam/seabornplots

    5, 5, 5, 5, 5, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 1, 11, 10, 10, 10, 10,
    10, 10, 10, 8, 3, 7, 3, 2, 2, 2, 11, 7, 7, 11, 11, 9, 9, 8, 8, 8, 8, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 6, 12, 12, 12, 11, 11, 11, 9, 9, 9, 9, 9, 11, 11, 10,
    1, 12, 12, 12, 3, 2, 12, 11, 11, 11, 11, 11, 11, 11, 10, 3, 11, 11, 2, 2,
    1, 1, 1, 12, 12, 12, 12, 12, 12, 12, 6, 6
]
y = [
    30, 29, 29, 24, 19, 11, 9, 8, 7, 3, 57, 54, 52, 34, 30, 29, 8, 1, 49, 44,
    33, 31, 29, 29, 28, 27, 2, 6, 5, 52, 41, 36, 18, 27, 26, 46, 32, 35, 33,
    15, 14, 10, 0, 51, 49, 44, 43, 28, 27, 26, 19, 16, 56, 21, 19, 16, 49, 43,
    39, 25, 23, 22, 21, 13, 23, 1, 13, 17, 59, 55, 54, 10, 59, 1, 59, 57, 27,
    25, 22, 21, 4, 49, 59, 31, 30, 5, 0, 8, 6, 0, 39, 37, 35, 31, 27, 25, 18,
    11, 9
]

# print rs
# x = rs.gamma(12, size=60)
# y = 2 + rs.gamma(60,size=60)
# x = rs.gamma(2, size=1000)

# print 'y = '+ str(y)

graph = sns.jointplot(x, y, kind="hex", stat_func=kendalltau, color="#4CB391")

# x = np.random.normal(size=100)
# print 'x = '+ str(x)
# graph = sns.distplot(x);

sns.plt.savefig(__main__.__file__ + ".png")
# graph.pyplot.show()
sns.plt.show()

예제 #45

0

파일 보기

print("Kurtosis:")
print(data_set['T_MAX'].kurtosis())

## Graph T MAX / CO & O3
df = data_set.sort_values(['T_MAX', 'CO'], ascending=True)
plt.plot(df['T_MAX'], df['CO'])
plt.title("La concentración de CO frente a la temperatura máxima")
plt.show()

df = data_set.sort_values(['T_MAX', 'O3'], ascending=True)
plt.plot(df['T_MAX'], df['O3'])
plt.title("La concentración de Ozono frente a la temperatura máxima")
plt.show()

## Pairplot
sns.jointplot(data_set['T_MAX'], data_set['CO'], kind="reg")
plt.show()
plt.close()
sns.jointplot(data_set['T_MAX'], data_set['O3'], kind="reg")
plt.show()
plt.close()

## Correlation Matrix
data_set_corr = data_set
data_set_corr['Mes'] = data_set_corr['Mes'].map({
    'ENE': 1,
    'FEB': 2,
    'MAR': 3,
    'ABR': 4,
    'MAY': 5,
    'JUN': 6,

예제 #46

0

파일 보기

ax_histx = plt.axes(rect_histx)
ax_histx.tick_params(direction='in', labelbottom=False)
ax_histy = plt.axes(rect_histy)
ax_histy.tick_params(direction='in', labelleft=False)

# the scatter plot:
ax_scatter.scatter(x, y)

# now determine nice limits by hand:
binwidth = 0.25
lim = np.ceil(np.abs([x, y]).max() / binwidth) * binwidth
ax_scatter.set_xlim((-lim, lim))
ax_scatter.set_ylim((-lim, lim))

bins = np.arange(-lim, lim + binwidth, binwidth)
ax_histx.hist(x, bins=bins)
ax_histy.hist(y, bins=bins, orientation='horizontal')

ax_histx.set_xlim(ax_scatter.get_xlim())
ax_histy.set_ylim(ax_scatter.get_ylim())

plt.show()

# Seaborn version
import numpy as np
import seaborn as sns
#sns.set(style="ticks")

sns.jointplot(x, y)
sns.jointplot(x, y, kind="hex", color="#4CB391")

예제 #47

0

파일 보기

파일: Getting_Started_With_Data_Analysis.py 프로젝트: tonomuniz/Python-in-Quantitative-Finance

#Visulization
matplotlib.rcdefaults()

plt.show(df.plot(kind = 'box'))

pd.options.display.mpl_style = 'default' # Sets the plotting display theme to ggplot2
df.plot(kind = 'box')

sns.boxplot(data=df,width=0.5)
sns.violinplot(df,width=3.5)

plt.show(sns.distplot(df.ix[:,2], rug = True, bins = 15))

with sns.axes_style("white"):
    plt.show(sns.jointplot(df.ix[:,1],df.ix[:,2], kind = "kde"))

plt.show(sns.lmplot("Benguet","Ifugao",df))

#Creating custom function
def add_2int(x,y):
    return x+y
print(add_2int(2,2))

# an algorithm example
def case(n=10,mu=3,sigma=np.sqrt(5),p=0.025,rep=100):
    m=np.zeros((rep,4))

    for i in range(rep):
        norm = np.random.normal(loc = mu, scale = sigma, size = n)
        xbar = np.mean(norm)

예제 #48

0

파일 보기

파일: RS.py 프로젝트: afcarl/PythonDS-MLBootcamp

df.head()

import matplotlib.pyplot as plt
import seaborn as sns

df.groupby('title')['rating'].mean().sort_values(ascending=False).head()
df.groupby('title')['rating'].count().sort_values(ascending=False).head()

ratings = pd.DataFrame(df.groupby('title')['rating'].mean())

ratings['numRatings'] = pd.DataFrame(df.groupby('title')['rating'].count())
ratings.head()

ratings['numRatings'].hist(bins=100, figsize=(10, 6))
ratings['rating'].hist(bins=100, figsize=(10, 6))
sns.jointplot(x='rating', y='numRatings', data=ratings, alpha=0.6)
# as the number of ratings goes up, so does the average rating

moviemat = df.pivot_table(index='user_id', columns='title', values='rating')

moviemat.head()

ratings.sort_values('numRatings', ascending=False).head(10)

starwars_user_ratings = moviemat['Star Wars (1977)']
liarliar_user_ratings = moviemat['Liar Liar (1997)']

# This will show how people who have seen star wars rate other movies
similar_to_starwars = moviemat.corrwith(starwars_user_ratings)
similar_to_liarliar = moviemat.corrwith(liarliar_user_ratings)

예제 #49

0

파일 보기

파일: JointDistributions.py 프로젝트: alketcecaj12/datavizinpython

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

#sns.residplot(x='age',y='fare',data=tips,color='indianred')
# Generate a green residual plot of the regression between 'hp' and 'mpg'

auto = pd.read_csv('auto.csv')


# Generate a joint plot of 'hp' and 'mpg'
sns.jointplot(x = 'hp', y = 'mpg', data = auto)

# Display the plot
plt.show()

예제 #50

0

파일 보기

axes1 = fig.add_axes([0.1, 0.1, 0.8, 0.8])
axes1.scatter(j_day, dw_solar_everyday, label='Observed dw_solar', color='red')
axes1.scatter(j_day, ghi_everyday, label='Clear Sky GHI', color='green')

axes1.set_xlabel('Days')
axes1.set_ylabel('Solar Irradiance (Watts /m^2)')
axes1.set_title('Solar Irradiance - Test Year 2009')
axes1.legend(loc='best')

fig.savefig('RNN Paper Results/Exp2_1/' + test_location + '/' + test_year +
            'Figure 2.jpg',
            bbox_inches='tight')

# In[525]:

sns.jointplot(x=dw_solar_everyday, y=ghi_everyday, kind='reg')
plt.xlabel('Observed global downwelling solar (Watts/m^2)')
plt.ylabel('Clear Sky GHI (Watts/m^2)')
plt.savefig('RNN Paper Results/Exp2_1/' + test_location + '/' + test_year +
            'Figure 3',
            bbox_inches='tight')

# ### making the Kt (clear sky index at time t) column by first removing rows with ghi==0

# In[526]:

if run_train:
    # TRAIN dataset
    df_train = df_train[df_train['ghi'] != 0]
    df_train['Kt'] = df_train['dw_solar'] / df_train['ghi']
    df_train.reset_index(inplace=True)

예제 #51

0

파일 보기

파일: PS05.py 프로젝트: konstantin-boss/Quantitative-Macro-2019

#mu = np.array([-0.5, -2.5])
size = 1000000 # at 10 million my RAM is overloaded

### If a vector X is normally distributed, then exp(X) is lognormally distributed with the same mean and variance

log_data = np.random.multivariate_normal(mu,cov, size=size)
level_data = np.exp(log_data)
k = level_data[:,1]
z = level_data[:,0]
lnk = log_data[:,1]
lnz = log_data[:,0]


### Plotting the joint density functions for levels and for logs
## First levels
sns.jointplot(k,z,kind="hex").set_axis_labels("Capital", "Productivity")
plt.show()

sns.jointplot(lnk,lnz,kind="hex").set_axis_labels("Log Capital", "Log Productivity")
plt.show()
'''
## Plotting the raw joint density of lognormal variables does not make much sense as in 10,000,000 observations there will be massive outliers
### I atempt to get rid of these outliers for plotting purposes

meank = np.mean(k)
sdk = np.std(k)
final_k = [x for x in k if (x > meank - 2 * sdk)]
final_k = [x for x in final_k if (x < meank + 2 * sdk)]


meanz = np.mean(z)

예제 #52

0

파일 보기

파일: Regression_pca.py 프로젝트: Prashantpiyush11/DSSreni

def viz_cont_cont(df, features, target):
    for feature in features:
        sns.jointplot(x=feature, y=target, data=df)

예제 #53

0

파일 보기

파일: Movie_Project_Detailed_Data_Processing.py 프로젝트: mzhou356/dc_ds_06_03_mod1_project

merged_df.popularity.plot.hist(bins=50, color='green')
# explore vote_average distribution
# appear to be almost normal distribution
merged_df.vote_average.plot.hist(bins=50, color='red')
# to fix popularity, we will remove vote_count under 10 to prevent bias
merged_df = merged_df[~(merged_df.vote_count < 10)]
# replot
merged_df.popularity.plot.hist(bins=50, color='blue',
                               alpha=0.5)  # appear to be better
# plot scatter and find r2 for popularity versus domestic_gross columns
# before plot, we want to convert the scale into log10 and need to remove 0s
merged_df = merged_df[~(merged_df.domestic_gross == 0)]
merged_df = merged_df[~(merged_df.worldwide_gross == 0)]
merged_df.to_pickle('budget_popularity.pkl')
sns.jointplot(merged_df['popularity'],
              np.log10(merged_df['domestic_gross']),
              kind="reg",
              stat_func=hf.r2)
sns.jointplot(merged_df['popularity'],
              np.log10(merged_df['worldwide_gross']),
              kind="reg",
              stat_func=hf.r2)
sns.jointplot(merged_df['vote_average'],
              np.log10(merged_df['domestic_gross']),
              kind="reg",
              stat_func=hf.r2)
sns.jointplot(merged_df['vote_average'],
              np.log10(merged_df['worldwide_gross']),
              kind="reg",
              stat_func=hf.r2)
# popularity is R2 is 0.3 while vote_average is 0.051, we will use popularity as a metric to estimate gross income
# we will use popularity to estimate how well genres perform using tmdb data frame

예제 #54

0

파일 보기

파일: make_scatterplots.py 프로젝트: Keesiu/meta-kaggle

for a, b in product(features, plottables):
    msg('Making %s %s' % (a, b))
    x = with_elo[a]
    y = with_elo[b]
    msg('type = %s' % x.dtype)
    if x.dtype == 'object':
        plt.figure()
        x.value_counts().plot(kind='bar')
        plt.savefig('/data/' + a + '_hist.png')
        plt.close('all')
    else:
        try:
            xlim = tuple(np.percentile(x, [1, 99]))
            ylim = tuple(np.percentile(y, [1, 99]))
            with sns.axes_style("white"):
                sns.jointplot(x, y, kind="hex", xlim=xlim, ylim=ylim)
            plt.savefig('/data/scatter_' + a + '_' + b + '.png')
            plt.close('all')
        except:
            #        sns.violinplot(x, y)
            #        plt.savefig('/data/' + a + '_' + b + '.png')
            #        plt.close()
            plt.figure()
            x.plot(kind='hist')
            plt.savefig('/data/' + a + '_hist.png')
            plt.close('all')

do_indivs = True
if do_indivs:
    for a, b in product(features, plottables):
        msg('Making %s %s' % (a, b))

예제 #55

0

파일 보기

파일: stability_class.py 프로젝트: lalcayag/PhD_repository_v_1

file_out_figures = 'C:/Users/lalc/Documents/Old Documents folder/PhD/Meetings/July 2020/'
file = ['U','UN','N','SN']  
limits = [[-np.inf,-.1], [-.1,-.01], [-.01,.01], [.01,.21], [.21,np.inf]]  
limits = [[-np.inf,-.1], [-.1,-.01], [-.01,.01], [.01,.21]]
     
relind = L30min1.relscan>.25
j = -2
for i,l in enumerate(limits):
    stabind = ((Ri1[:,j]>l[0]) & (Ri1[:,j]<l[1]))
    cols = np.r_[['$L_{u_1,x_1}$', '$L_{u_1,x_2}$','$L_{v_1,x_1}$', '$L_{v_1,x_2}$','$L_{h,x_1}$', '$L_{h,x_2}$'], L30min1.columns [6:]]
    L30min1.columns = cols
    xlim = 5*200
    ylim = 5*200
    g = sns.jointplot(x ='$L_{h,x_1}$', y = '$L_{h,x_2}$', data=L30min1.loc[relind & stabind & ind1], 
                            height = 8, kind="kde", cmap="jet", xlim = (0,xlim), ylim = (0,ylim),
                            color='k')#,cbar=True, cbar_kws={"format": formatter, "label": '$Density$'})
    g.set_axis_labels('$L_{h,x_1}$', '$L_{h,x_2}$', fontsize = 24)
    g.ax_joint.plot([0,xlim],[0,ylim],'--k', linewidth = 2)
    g.ax_joint.plot(L30min1.loc[relind & stabind & ind1]['$L_{h,x_1}$'].values,L30min1.loc[relind & stabind & ind1]['$L_{h,x_2}$'].values,'o', color = 'k', alpha=.2)
    g.ax_joint.text(100, 800,'$'+'%.2f' % l[0] +'<Ri_f<'+'%.2f' % l[1] +'$',fontsize=30,color='r')
    plt.tight_layout()
    plt.savefig(file_out_figures+file[i]+'_phase_1.png')


file = ['U','UN','N','SN','VS']       
relind = L30min2.relscan>.25
for i,l in enumerate(limits):
    stabind = ((Ri2[:,-2]>l[0]) & (Ri2[:,-2]<l[1]))
    cols = np.r_[['$L_{u_1,x_1}$', '$L_{u_1,x_2}$','$L_{v_1,x_1}$', '$L_{v_1,x_2}$','$L_{h,x_1}$', '$L_{h,x_2}$'], L30min2.columns [6:]]
    L30min2.columns = cols

예제 #56

0

파일 보기

# que pasa por los valores, lo desactivamos asi

sns.distplot(tips['total_bill'],kde=False)
plt.show()

# podemos modificar la cantidad de bins que son la barras,
# con el parametro bins solo pasando un int, hay que tener
# cuidado con el tamaño del bin

sns.distplot(tips['total_bill'],kde=False,bins=40)
plt.show()

# tenemos un metodos que nos compara dos columnas dentro de
# un dataset

sns.jointplot(x='total_bill',y='tip',data=tips)
plt.show()

# podemos graficar esto de varias maneras con el parametro 
# kind usando: hex, reg, kde

# este otro metodo nos muestra una serie de graficas comparando
# todas las columnas con todas, cuando se compara con si mismo,
# muestra un histogram, y cuando es con otro, es un jointplot()

sns.pairplot(tips)
plt.show()

# si queremos dividir la informacion de cada grafica por otras
# columnas por ejemplo por sexo usamos el parametro hue, se le
# pasa una columa categorial, no que tenga un valor por eso

예제 #57

0

파일 보기

Next compare the distributions of the positive and negative examples over a few features. 
Good questions to ask yourself at this point are:

	* Do these distributions make sense?
		+ Yes. You've normalized the input and these are mostly concentrated in the +/- 2 range.
	* Can you see the difference between the ditributions?
		+ Yes the positive examples contain a much higher rate of extreme values.
-----------------------------------------------------------------------------------------
'''
pos_df = pd.DataFrame(train_features[ bool_train_labels], columns = train_df.columns)
neg_df = pd.DataFrame(train_features[~bool_train_labels], columns = train_df.columns)

sns.jointplot(
    pos_df['V5'], 
    pos_df['V6'],
    kind='hex', 
    xlim = (-5,5), 
    ylim = (-5,5)
)

plt.suptitle("Positive distribution")

sns.jointplot(
    neg_df['V5'], 
    neg_df['V6'],
    kind='hex', 
    xlim = (-5,5), 
    ylim = (-5,5)
)

_ = plt.suptitle("Negative distribution")

예제 #58

0

파일 보기

파일: histograms and density plots.py 프로젝트: souviksaha97/Data-Science-Lab


# Histogram
sns.distplot(a = iris_data['Petal Length (cm)'], kde=False)



# Kernel Density Estimate (kde)
# This is the smoothed histogram

# kde plot
sns.kdeplot(data=iris_data['Petal Length (cm)'], shade=True)


# We can create two-dimensional kde plot
sns.jointplot(x=iris_data['Petal Length (cm)'],
              y=iris_data['Sepal Width (cm)'], kind='kde')








# Let split the data to understand difference btw species

iris_set_data = pd.read_csv('data/iris_setosa.csv', index_col="Id")
iris_ver_data = pd.read_csv('data/iris_versicolor.csv', index_col="Id")
iris_vir_data = pd.read_csv('data/iris_virginica.csv', index_col="Id")

예제 #59

0

파일 보기

def explore_global_plot(data, label='label', n_feats=50, id=None, task='classification'):
    '''
    :param data: DataFrame
    :param label: label column name in the data
    :param n_feats: the number of features be used to analysis.
    :param task: regression or classification
    :return:
    '''
    columns = data.columns.tolist()
    columns.remove(label)

    if id is not None:
        if columns[id].duplicated().sum():
            print('{} is duplicated !!!'.format(id))

        columns.remove(id)
        data.drop(id, axis=1, inplace=True)

    numeric_features = [True if any([ptypes.is_integer_dtype(i),ptypes.is_int64_dtype(i),ptypes.is_float_dtype(i)]) else False for i in data[columns].dtypes]
    numeric_names = [columns[i] for i, v in enumerate(numeric_features) if v]
    category_names = list(set(columns) - set(numeric_names))

    if task == 'classification':
        if len(category_names):
            # data distribution for each class
            new_data = data.dropna(axis=0)
            famd = prince.FAMD(
                n_components=2,
                n_iter=3,
                copy=True,
                check_input=True,
                engine='auto',
                random_state=42
            )
            famd = famd.fit(new_data[columns])
            ax = famd.plot_row_coordinates(
                new_data,
                ax=None,
                x_component=0,
                y_component=1,
                labels=new_data.index,
                color_labels=['{}'.format(t) for t in new_data[label]],
                ellipse_outline=False,
                ellipse_fill=True,
                show_points=True
            )
            plt.show()
        else:
            new_data = data.dropna(axis=0)
            pca = PCA(n_components=2, random_state=seed)
            X_pca = pca.fit_transform(new_data[columns])
            sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=label, data=new_data)
            plt.show()

    # sort features for correlation plot
    sorted_feat_name = numeric_names
    if len(numeric_names) > 6:
        n_clusters = 3
        new_data = data[[label] + numeric_names].dropna(axis=0)
        new_data_feat = new_data[numeric_names]
        new_data_stand = StandardScaler().fit_transform(new_data_feat)
        kmean_init = KMeans(n_clusters=n_clusters, random_state=seed)
        new_data_kmean=kmean_init.fit_transform(
            new_data_stand.reshape(len(numeric_names), -1))
        sorted_feat = sorted(zip(numeric_names, kmean_init.labels_), key=lambda x: x[1])
        sorted_feat_name = [i[0] for i in sorted_feat]

    # correlation plot for all features
    sns.heatmap(data[[label] + sorted_feat_name + category_names].corr())
    plt.show()

    # outlier detection just for numeric features
    outlier = data[numeric_names].apply(mad_based_outlier)
    for i, column in enumerate(outlier.columns):
        print('outlier:\n {}'.format(data[[column]][outlier.iloc[:, i]]))

    # missing value pattern plot for all features
    msno.matrix(data[columns[:n_feats]])
    plt.show()

    msno.bar(data[columns[:n_feats]])
    plt.show()

    miss_data = data[columns[:n_feats]].isnull().sum(axis=1)
    miss_data = miss_data.to_frame()
    miss_data.columns = ['number_of_missing_attributes']
    miss_data.sort_values('number_of_missing_attributes', inplace=True)
    miss_data['index'] = list(range(0, miss_data.shape[0]))
    sns.jointplot(x="index", y="number_of_missing_attributes", data=miss_data)
    plt.show()

예제 #60

0

파일 보기

def analyze_zN(z, outdir, vg, skip_umap=False, num_pcs=2, num_ksamples=20):
    zdim = z.shape[1]

    # Principal component analysis
    log('Perfoming principal component analysis...')
    pc, pca = analysis.run_pca(z)  
    log('Generating volumes...')
    for i in range(num_pcs):
        start, end = np.percentile(pc[:,i],(5,95))
        z_pc = analysis.get_pc_traj(pca, z.shape[1], 10, i+1, start, end)
        vg.gen_volumes(f'{outdir}/pc{i+1}', z_pc)

    # kmeans clustering
    log('K-means clustering...')
    K = num_ksamples
    kmeans_labels, centers = analysis.cluster_kmeans(z, K)
    centers, centers_ind = analysis.get_nearest_point(z, centers)
    if not os.path.exists(f'{outdir}/kmeans{K}'): 
        os.mkdir(f'{outdir}/kmeans{K}')
    utils.save_pkl(kmeans_labels, f'{outdir}/kmeans{K}/labels.pkl')
    np.savetxt(f'{outdir}/kmeans{K}/centers.txt', centers)
    np.savetxt(f'{outdir}/kmeans{K}/centers_ind.txt', centers_ind, fmt='%d')
    log('Generating volumes...')
    vg.gen_volumes(f'{outdir}/kmeans{K}', centers)

    # UMAP -- slow step
    if zdim > 2 and not skip_umap:
        log('Running UMAP...')
        umap_emb = analysis.run_umap(z)
        utils.save_pkl(umap_emb, f'{outdir}/umap.pkl')

    # Make some plots
    log('Generating plots...')
    plt.figure(1)
    g = sns.jointplot(x=pc[:,0], y=pc[:,1], alpha=.1, s=2)
    g.set_axis_labels('PC1','PC2')
    plt.tight_layout()
    plt.savefig(f'{outdir}/z_pca.png')
    
    plt.figure(2)
    g = sns.jointplot(x=pc[:,0], y=pc[:,1], kind='hex')
    g.set_axis_labels('PC1','PC2')
    plt.tight_layout()
    plt.savefig(f'{outdir}/z_pca_hexbin.png')

    if zdim > 2 and not skip_umap:
        plt.figure(3)
        g = sns.jointplot(x=umap_emb[:,0], y=umap_emb[:,1], alpha=.1, s=2)
        g.set_axis_labels('UMAP1','UMAP2')
        plt.tight_layout()
        plt.savefig(f'{outdir}/umap.png')

        plt.figure(4)
        g = sns.jointplot(x=umap_emb[:,0], y=umap_emb[:,1], kind='hex')
        g.set_axis_labels('UMAP1','UMAP2')
        plt.tight_layout()
        plt.savefig(f'{outdir}/umap_hexbin.png')

    analysis.scatter_annotate(pc[:,0], pc[:,1], centers_ind=centers_ind, annotate=True)
    plt.xlabel('PC1')
    plt.ylabel('PC2')
    plt.savefig(f'{outdir}/kmeans{K}/z_pca.png')

    g = analysis.scatter_annotate_hex(pc[:,0], pc[:,1], centers_ind=centers_ind, annotate=True)
    g.set_axis_labels('PC1','PC2')
    plt.tight_layout()
    plt.savefig(f'{outdir}/kmeans{K}/z_pca_hex.png')

    if zdim > 2 and not skip_umap:
        analysis.scatter_annotate(umap_emb[:,0], umap_emb[:,1], centers_ind=centers_ind, annotate=True)
        plt.xlabel('UMAP1')
        plt.ylabel('UMAP2')
        plt.savefig(f'{outdir}/kmeans{K}/umap.png')

        g = analysis.scatter_annotate_hex(umap_emb[:,0], umap_emb[:,1], centers_ind=centers_ind, annotate=True)
        g.set_axis_labels('UMAP1','UMAP2')
        plt.tight_layout()
        plt.savefig(f'{outdir}/kmeans{K}/umap_hex.png')

    for i in range(num_pcs):
        if zdim > 2 and not skip_umap:
            analysis.scatter_color(umap_emb[:,0], umap_emb[:,1], pc[:,i], label=f'PC{i+1}')
            plt.xlabel('UMAP1')
            plt.ylabel('UMAP2')
            plt.tight_layout()
            plt.savefig(f'{outdir}/pc{i+1}/umap.png')