def plot_heatmap(x, y_mat, alpha=1,title=None,sizes=None,share_axis=False):
    if sizes is None:
        sizes = 60
    if y_mat.ndim == 1:
        y_mat = np.expand_dims(y_mat, 1)
    pl.close()
    fig = pl.figure(4)
    fig.suptitle(title)
    for index, y in enumerate(y_mat.T):
        if index == 0:
            ax1 = pl.subplot(y_mat.shape[1], 1, index + 1)
        else:
            if share_axis:
                pl.subplot(y_mat.shape[1], 1, index + 1, sharex=ax1, sharey=ax1)
            else:
                pl.subplot(y_mat.shape[1], 1, index + 1)
        red_values = normalize(y)
        I = np.isfinite(y) & np.isfinite(x[:,0]) & np.isfinite(x[:,1])
        colors = np.zeros((red_values.size, 3))
        colors[:,0] = red_values
        pl.ylabel(str(index))
        if I.mean > 0:
            print 'Percent skipped due to nans: ' + str(1-I.mean())
        pl.scatter(x[I,0], x[I,1], alpha=alpha, c=colors[I,:], edgecolors='none', s=sizes)
    move_fig(fig, 1000, 1000)
    pl.show(block=True)
    pass
Example #2
0
def plot_trajectory(mu_vector):
    data0 = mu_vector[:, 0]
    data1 = mu_vector[:, 1]
    labels = ["{0}".format(i) for i in xrange(len(mu_vector))]
    plt.scatter(data0[:, 0], data0[:, 1], color="red")
    plt.scatter(data1[:, 0], data1[:, 1], color="blue")
    for i in xrange(len(mu_vector)):
        plt.annotate(
            labels[i],
            (data0[i, 0], data0[i, 1]),
            fontsize=5,
            xytext=(-10, 20),
            textcoords="offset points",
            ha="right",
            va="bottom",
            arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=0"),
        )
        plt.annotate(
            labels[i],
            (data1[i, 0], data1[i, 1]),
            fontsize=5,
            xytext=(-10, 20),
            textcoords="offset points",
            ha="right",
            va="bottom",
            arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=0"),
        )
    plt.savefig("Mean_Trajectory.png")
    plt.show()
Example #3
0
File: pca.py Project: id774/sandbox
def test(args):
    data = multivariate_normal([0, 0], [[1, 2], [2, 5]], int(args[1]))
    print(data)
    # PCA
    result = pca(data, base_num=int(args[2]))
    pc_base = result[0]
    print(pc_base)

    # Plotting
    fig = plt.figure()
    fig.add_subplot(1, 1, 1)
    plt.axvline(x=0, color="#000000")
    plt.axhline(y=0, color="#000000")
    # Plot data
    plt.scatter(data[:, 0], data[:, 1])
    # Draw the 1st principal axis
    pc_line = sp.array([-3.0, 3.0]) * (pc_base[1] / pc_base[0])
    plt.arrow(0, 0, -pc_base[0] * 2, -pc_base[1] * 2, fc="r", width=0.15, head_width=0.45)
    plt.plot([-3, 3], pc_line, "r")
    # Settings
    plt.xticks(size=15)
    plt.yticks(size=15)
    plt.xlim([-3, 3])
    plt.tight_layout()
    plt.show()
    plt.savefig("image.png")

    return 0
Example #4
0
def main():
    # an example non-autonomous function
    x0 = 1
    t = np.linspace(1,3,500)

    # use the same syntax as odeint
    sol = LSolve(example,x0,t,args=(1,1))
    
    if matplotlib_module:
        mp.figure(1)
        mp.title("Example solution")
        mp.plot(t,sol)


    # example integrate and fire code
    x0 = 0
    t2 = np.linspace(0,10,500)
    
    # again the syntax is the same as odeint, but we add aditional inputs,
    # including a flag to track spikes (IF models only):
    threshold = 5
    sol2,spikes = LSolve(QIF,x0,t,threshold=threshold,reset=0,spike_tracking=True,args=(5,))

    # extract spike times
    spikes[spikes==0.0] = None
    spikes[spikes==1.0] = threshold

    if matplotlib_module:
        mp.figure(2)
        mp.title("QIF model with noise")
        mp.plot(t2,sol2)
        mp.scatter(t2,spikes,color='red',facecolor='red')
        mp.show()
Example #5
0
def movie_plotter(components, movies, movie_id="all", x_buffer=3, y_buffer=2):
    if movie_id == "all":
        plt.scatter(components[:,0], components[:,1])
        plt.xlabel("Component 1")
        plt.ylabel("Component 2")
        plt.show()
    else:
        x = components[movie_id][0]
        y = components[movie_id][1]

        xs = [x - x_buffer, x + x_buffer]
        ys = [y - y_buffer, y + y_buffer]

        plt.scatter(components[:,0], components[:,1])
        plt.xlim(xs)
        plt.ylim(ys)
        plt.xlabel("Component 1")
        plt.ylabel("Component 2")

        for x, y, title in zip(components[:,0], components[:,1], movies['movie_title']):
            if x >= xs[0] and x <= xs[1] and y >= ys[0] and y <= ys[1]:
                try:
                    plt.text(x, y, title)
                except:
                    pass
def classification_regions(network, title, img_file_name, interval=100):
    coords = [
        (i / interval, j / interval)
        for j
        in range(0, interval + 1, 1)
        for i
        in range(0, interval + 1, 1)]

    classified_records = []

    for coord in coords:
        output = network.run(coord)

        classified_records.append(
            [coord, output.index(max(output)) + 1])

    plt.scatter(
        [record[0][0] for record in classified_records],
        [record[0][1] for record in classified_records],
        c=[record[1]    for record in classified_records],
    )

    plt.xlim((0, 1))
    plt.ylim((0, 1))

    plt.title(title)
    plt.xlabel('Six-fold rotational symmetry')
    plt.ylabel('Eccentricity')

    plt.savefig(img_file_name)
def main_k_nearest_neighbour(k):
    X, y = make_blobs(n_samples=100,
                      n_features=2,
                      centers=2,
                      cluster_std=1.0,
                      center_box=(-10.0, 10.0))

    h = .4
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    z = np.c_[xx.ravel(), yy.ravel()]

    z_f = []
    for i_z in z:
        z_f.append(k_nearest_neighbour(X, y, i_z, k, False))

    zz = np.array(z_f).reshape(xx.shape)

    plt.figure()
    plt.contourf(xx, yy, zz, cmap=plt.cm.Paired)
    plt.axis('tight')
    plt.scatter(X[:, 0], X[:, 1], c=y)

    plt.show()
Example #8
0
def winding_number(filename,window,bins=0):
    
    data=tools.read_matrix_from_file(filename);
    print "file read."
    print len(data[1])

    if (bins ==0):
        bins=window
    
    times=np.zeros(bins)
    values=np.zeros(bins)
    ns=np.zeros(bins)
    step=window/bins
    
    for i in range(0,bins):
        times[i]=i*step
    
    for k in range(0,len(data[1])-window,window):
    
        for j in range(k,window+k,step):
        
            for i in range(0,bins):
                
                values[i]=values[i]+(data[1][j]-data[1][j - i*step])**2
                ns[i]=ns[i]+1
    
    for i in range(0,bins):
        if (ns[i] != 0):
            values[i]=values[i]/ns[i]
        else:
            values[i]=0
    plt.scatter(times,values)
    return [times,values]
Example #9
0
 def plot_pulses(results, ymin=0, ymax=20):
     plt.plot(results["times"], results["amounts"])
     s = np.array([1] * len(results["times"]))
     c = np.array(["k"] * len(results["times"]))
     if "durations" in results:
         # semi-Markovian
         start = 0
         for d, pulse in zip(results["durations"],
                             results["pulses"]):
             end = min(start + d, len(results["times"]) - 1)
             if pulse:
                 c[start:end] = "red"
                 s[start:end] = 2
             start += d
     else:
         # Markovian
         for n, t in enumerate(results["times"]):
             pulse = results["pulses"][n]
             if pulse:
                 c[n] = "red"
                 s[n] = 2
     plt.scatter(results["times"], [1] * len(results["times"]), color=c, s=s)
     plt.xlabel(r"Time, $t$")
     plt.ylabel("Glucose amount")
     plt.ylim([ymin, ymax])
     plt.xlim([time_obj.t.min(), time_obj.t.max()])
     sns.despine()
Example #10
0
def main():   
    x0 = np.loadtxt('ex/ex5Linx.dat')
    y = np.loadtxt('ex/ex5Liny.dat')
    x0.shape=x0.size,1
    y.shape = y.size,1
    
    plt.scatter(x0,y)
    x = polynomial_linear(x0)
#    x,mns,sstd = z_scale(x)

    theta_normal = linear_normal_equation(x,y, 1.0)
    print 'normal equation:'
    print theta_normal
    plot_fitting(theta_normal)
    plt.show()  
    m,n=x.shape 
    alphas = ( 0.01, 0.03, 0.1, 0.3, 1, 1.3 )    # if alpha >=1.3, no convergence result
    lambdas = (0, 1, 10)
    MAX_ITR = 100 
    for lam in lambdas:
        for alpha in alphas:
            theta,Js = linear_regression(x,y, MAX_ITR, alpha, lam)    
            if alpha==0.03 and lam==1:
                theta_best = theta
            plt.plot(Js)
        plt.xlabel('iterations')
        plt.ylabel('cost: J')
        plt.legend(['alpha: %s' %i for i in alphas])
        plt.show()

    print 'best theta in alpha:\n ', theta_best
    test = x0[-1]
    test.shape=test.size,1
    test = polynomial_linear(test)
    print 'predict of %s is %s' %(test, predict_linear(theta, test))
def plot_2d_sub(x,y,data_set_ids=None,alpha=1,title=None,sizes=None):
    if sizes is None:
        sizes = 60
    pl.close()
    fig = pl.figure(4)
    if data_set_ids is None:
        data_set_ids = np.zeros(y.size)
    u = np.unique(data_set_ids)
    fig.suptitle(title)
    min_x = x.min()
    max_x = x.max()
    for index, val in enumerate(u):
        '''
        if index == 0:
            ax1 = pl.subplot(len(u),1,index+1)

        else:
            pl.subplot(len(u),1,index+1,sharex=ax1,sharey=ax1)
        '''
        ax = pl.subplot(len(u), 1, index + 1)
        #pl.title(title)
        inds = data_set_ids == val
        inds = inds.squeeze()
        pl.ylabel(str(val))
        pl.scatter(x[inds],y[inds],alpha=alpha,c='r',s=sizes,)
        ax.set_xlim([min_x, max_x])
    move_fig(fig)
    pl.show(block=True)
    pass
Example #12
0
    def test1():
        x = [0.5]*3
        xbounds = [(-5, 5) for y in x]


        GA = GenAlg(fitcalc1, x, xbounds, popMult=100, bitsPerGene=9, mutation=(1./9.), crossover=0.65, crossN=2, direction='min', maxGens=60, hammingDist=False)
        results = GA.run()
        print "*** DONE ***"
        #print results
        plt.ioff()
        #generate pareto frontier numerically
        x1_ = np.arange(-5., 0., 0.05)
        x2_ = np.arange(-5., 0., 0.05)
        x3_ = np.arange(-5., 0., 0.05)

        pfn = []
        for x1 in x1_:
            for x2 in x2_:
                for x3 in x3_:
                    pfn.append(fitcalc1([x1,x2,x3]))

        pfn.sort(key=lambda x:x[0])
        
        plt.figure()
        i = 0
        for x in results:
            plt.scatter(x[1][0], x[1][1], 20, c='r')

        plt.scatter([x[0] for x in pfn], [x[1] for x in pfn], 1.0, c='b', alpha=0.1)
        plt.xlim([-20,-1])
        plt.ylim([-12, 2])
        plt.draw()
Example #13
0
def colorPlot():
    RA=[]
    DEC=[]
    FWHM=[]
    e1=[]
    e2=[]
    e=[]
    FWHM_max=0
    FWHM_min=1
    f=open("star_info.txt")
    i=0
    for line in f.readlines():
        temp= line.split()
        RA.append(float(temp[0]))
        DEC.append(float(temp[1]))
        e1.append(float(temp[2]))
        e2.append(float(temp[3]))
        e.append(np.sqrt(e1[-1]**2+e2[-1]**2))
        FWHM.append(float(temp[6]))
        if e[-1]>FWHM_max:
            FWHM_max=e[-1]
        if e[-1]<FWHM_min:
            FWHM_min=e[-1]
        i=i+1
       # if i>3000:
       #     break


    for i in range(len(RA)):
        plt.scatter(RA[i],DEC[i], color=str(float(e[i]-FWHM_min)/FWHM_max))
     #   if i>3000:
     #       break

    plt.show()
    def plot_knowledge_count(agent_network, filename):

        word_dict = dict()
        agent_list = agent_network.get_all_agents()
        for agent_item in agent_list:
            for word in agent_item.knowledge:
                if word not in word_dict:
                    word_dict[word] = 0
                word_dict[word] = word_dict[word] + 1

        word_count_tuple_list = word_dict.items()
        word_count_tuple_list = sorted(word_count_tuple_list, key=itemgetter(1))
        print word_count_tuple_list

        x = list()
        y = list()

        for item in word_count_tuple_list:
            word = item[0]
            count = item[1]
            x.append(word)
            y.append(count)

        plt.scatter(x, y, s=30, vmin = 0, vmax= 100, alpha=0.5)
        plt.savefig(filename)
Example #15
0
def tsne_2D(inputs, colors=None, labels=None, initial_dims = 50, perplexity = 30.0):
    """
    Plots in 2D a set of points (the rows of NumPy 2D array
    ``inputs``), using t-SNE.

    A color coding can be specified with option ``colors``
    (e.g. ['b','r','k','k'] would yield one blue, one red and two
    black points). String labels for each data point can also be
    provided. ``initial_dims`` and ``perplexity`` are hyper-parameters
    of t-SNE.

    This function requires t-SNE python code prodived by Laurens van
    der Maaten (see README in mlpython/misc/third_party/tsne/).

    """

    try:
        import mlpython.misc.third_party.tsne.tsne as tsne
        
    except:
        import warnings
        warnings.warn('tsne_2D requires the t-SNE python code prodived by Laurens van der Maaten. See mlpython/misc/third_party/tsne/README for instructions.')
        return

    Y = tsne.tsne(inputs, 2, 50, 20.0);
    if colors is None:
        colors = 'k'
    scatter(Y[:,0], Y[:,1], 20, colors);

    if labels is not None:
        for x,y,l in zip(Y[:,0],Y[:,1],labels):
            text(x,y,l)
def fit(w, f, e, mw, mf, vgrid, npol,
        sigrange=None, vrange=None, doppler=doppler, plot=False):

    vgrid = Quantity(vgrid, u.km/u.s)
    chi2 = Table([vgrid.value, np.zeros_like(vgrid.value)], names=['v','chi2'])
    chi2['v'].units = vgrid.unit

    fit1 = Fit1(w, f, e, mw, mf, npol, doppler)

    chi2['chi2'] = np.array([fit1(v)[0] for v in vgrid])

    chi2.meta['ndata'] = len(f)
    chi2.meta['npar'] = npol+1+1
    chi2.meta['ndof'] = chi2.meta['ndata']-chi2.meta['npar']

    if plot:
        import matplotlib.pylab as plt
        plt.scatter(chi2['v'], chi2['chi2'])

    if vrange is None and sigrange is None or len(vgrid) < 3:
        ibest = chi2['chi2'].argmin()
        vbest, bestchi2 = chi2[ibest]
        chi2.meta['vbest'] = vbest
        chi2.meta['verr'] = 0.
        chi2.meta['bestchi2'] = bestchi2
    else:
        vbest, verr, bestchi2 = minchi2(chi2, vrange, sigrange, plot=plot)

    _, fit, mfi = fit1(vbest)
    chi2.meta['wmean'] = fit1.wmean
    chi2.meta['continuum'] = fit1.sol
    return chi2, fit, mfi
Example #17
0
def generate_data():
    # Number of observations
    T = 1e3
    # Correlation
    rho = .9
    # True parameter
    beta = np.array([1., -.5])
    # True parameters for instruments
    gamma = np.array([1, -5, 2, 3, -1])
    # Random errors
    e = np.random.normal(size = (T, 2))
    # Instruments
    Z = np.random.normal(size = (T, 5))
    # Endogenous variables
    X1 = np.dot(Z, gamma) + e[:,0]
    X2 = np.dot(Z**2, gamma) + e[:,1]
    X = np.concatenate((X1[:, np.newaxis], X2[:, np.newaxis]), axis = 1)
    # Dependent variable
    Y = np.dot(X, beta) + e[:,0] + rho * e[:,1]
    
    print(X.shape, Y.shape)
    
    plt.scatter(X[:,0], Y)
    plt.show()
    
    return Y, X, Z
def fit_plot_unlabeled_data(unlabeled_data_x, labeled_data_x, labeled_data_y, fit_order, data_type, other_data_list, other_data_name):
    output = open('predictions.csv','wb')
    coeffs = np.polyfit(labeled_data_x, labeled_data_y, fit_order) #does poly git to nth deg on labeled data
    fit_eq = np.poly1d(coeffs) #Eqn from fit
    predicted_y = fit_eq(unlabeled_data_x)
    i = 0
    writer = csv.writer(output,delimiter=',')
    header = [str(data_type),str(other_data_name),'Predicted_Num_Inc']
    writer.writerow(header)
    while i < len(predicted_y):
        output_data = [unlabeled_data_x[i],other_data_list[i],predicted_y[i]]
        writer.writerow(output_data)
        print 'For '+str(data_type)+' of: '+str(unlabeled_data_x[i])+', Predicted Number of Incidents is: '+str(predicted_y[i])
        i = i + 1
    plt.scatter(unlabeled_data_x, predicted_y, color='blue', label='Predicted Number of Incidents')
    fit_line_x = np.arange(min(unlabeled_data_x), max(unlabeled_data_x), 1)
    plt.plot(fit_line_x, fit_eq(fit_line_x), color='red',linestyle='dashed',label=' Order '+str(fit_order)+' Polynomial Fit')
#____Use below line to plot actual data also!! 
    #plt.scatter(labeled_data_x, labeled_data_y, color='green', label='Actual Incident Report Data')
    plt.title('Predicted Number of 311 Incidents by '+str(data_type))
    plt.xlabel(str(data_type))
    plt.ylabel('Number of 311 Incidents')
    plt.grid()
    plt.xlim([min(unlabeled_data_x)-1500, max(unlabeled_data_x)+1500])
    plt.legend(loc='upper left')
    plt.show()
Example #19
0
def nova_plot():

	erg2mev=624151.

	fig=plot.figure()
	yrange = [1e-6,2e-4]
	xrange = [1e-1,1e5]
	plot.fill_between([0.2,10e3],[yrange[1],yrange[1]],[yrange[0],yrange[0]],facecolor='yellow',interpolate=True,color='yellow',alpha=0.5)
	plot.annotate('AMEGO',xy=(3,9e-5),xycoords='data',fontsize=26,color='black')

	lat=ascii.read("data/NMon2012.LAT.dat",names=['energy','en_low','en_high','flux','flux_err','tmp'])
	plot.scatter(lat['energy'],lat['flux']*erg2mev,color='red')
	plot.errorbar(lat['energy'],lat['flux']*erg2mev,xerr=[lat['en_low'],lat['en_high']],yerr=lat['flux_err']*erg2mev,ecolor='red',capsize=0,fmt='none')
	latul=ascii.read("data/NMon2012.LAT.limits.dat",names=['energy','en_low','en_high','flux','tmp1','tmp2','tmp3','tmp4'])
	plot.errorbar(latul['energy'],latul['flux']*erg2mev,xerr=[latul['en_low'],latul['en_high']],yerr=0.5*latul['flux']*erg2mev,uplims=True,ecolor='red',capsize=0,fmt='none')
	plot.scatter(latul['energy'],latul['flux']*erg2mev,color='red')

	leptonic=ascii.read("data/sp-NMon12-IC-best-fit-1MeV-30GeV.txt",names=['energy','flux'],data_start=1)
	hadronic=ascii.read("data/sp-NMon12-pi0-and-secondaries.txt",names=['energy','flux1','flux2'],data_start=1)	

	plot.plot(leptonic['energy'],leptonic['flux']*erg2mev,'r--',color='black',lw=2,label='Leptonic')
	plot.plot(hadronic['energy'],hadronic['flux2']*erg2mev,color='black',lw=2,label='Hadronic+Secondary Leptons')

	plot.legend(loc='upper right',fontsize='small',frameon=False,framealpha=0.5)
	plot.xscale('log')
	plot.yscale('log')
	plot.ylim(yrange)
	plot.xlim(xrange)
	plot.xlabel(r'Energy (MeV)')
	plot.ylabel(r'Energy$^2 \times $ Flux (Energy) (erg cm$^{-2}$ s$^{-1}$)')
	plot.title('Nova V339 Del 2013')
	plot.savefig('Nova_SED.png', bbox_inches='tight')
	plot.savefig('Nova_SED.eps', bbox_inches='tight')
	plot.show()
	plot.close()
def scipy_stuff():
  from scipy.interpolate import griddata
  from matplotlib import pylab
  import cPickle as pickle
  print "loading points"
  points, x_diff, y_diff = pickle.load(open("temp_data.pickle", "rb"))

  y_pts, x_pts = zip(*points)

  print "Creating grid points"
  grid_points = []
  for j in range(2500):
    for i in range(2500):
      grid_points.append((j, i))

  print "Gridding data"
  x_grid = griddata(points, x_diff, grid_points)
  y_grid = griddata(points, y_diff, grid_points)
  x_grid.shape = (2500, 2500)
  y_grid.shape = (2500, 2500)

  print "Plotting"
  pylab.subplot(3, 1, 1)
  pylab.imshow(x_grid)
  pylab.subplot(3, 1, 2)
  pylab.imshow(y_grid)
  pylab.subplot(3, 1, 3)
  pylab.scatter(x_pts, y_pts)
  pylab.show()
def plot_approx_error(r, error):
    plt.scatter(r, error, c='g', alpha=0.5)
    plt.title('Rank r Approximation Error')
    plt.xlabel('r')
    plt.ylabel('Frobenius Norm')
    plt.show()
    return
Example #22
0
def plot_rfs(size, C, Rx, Ry, color='b'):
    radius = np.sqrt(size[...]/np.pi)
    a, w = 0, C.shape[0]
    plt.scatter(Rx, Ry, s=15, color='w', edgecolor='k')
    plt.scatter(C[a:w, 1], C[a:w, 0], s=radius*500, alpha=0.4, color=color)
    plt.xticks([])
    plt.yticks([])
Example #23
0
def _gaussian_test():
    import matplotlib.pyplot as plt
    n = 10000
    mu_x = 0.0
    mu_y = 0.0
    #sig_x, sig_y = 1.5, 1.5
    tau = 0.0
    seeing = 1.5
    sigma = seeing / (2. * np.sqrt(2. * np.e))
    slit_width = 0.2
    slit_height = 10.0
    slit_x = np.empty(n, dtype=np.float64)
    slit_y = np.empty(n, dtype=np.float64)
    slit_x, slit_y = slit_gaussian_psf(n, mu_x, mu_y, sigma, sigma, tau, slit_width, slit_height)
    log.info("x range: [%s, %s]", slit_x.min(), slit_x.max())
    log.info("y range: [%s, %s]", slit_y.min(), slit_y.max())
    plt.scatter(slit_x, slit_y, alpha=0.8)
    plt.fill([-slit_width/2, slit_width/2, slit_width/2, -slit_width/2],
             [-slit_height/2, -slit_height/2, slit_height/2, slit_height/2],
             'r',
             alpha=0.10,
             edgecolor='k')
    plt.gca().set_aspect("equal")
    plt.title("Gaussian distribution")
    plt.xlim([-slit_height/2., slit_height/2])
    plt.show()
Example #24
0
def Decision_Surface(data, target, model, surface=True, probabilities=False, cell_size=.01):
    # Get bounds
    x_min, x_max = data[data.columns[0]].min(), data[data.columns[0]].max()
    y_min, y_max = data[data.columns[1]].min(), data[data.columns[1]].max()
    
    # Create a mesh
    xx, yy = np.meshgrid(np.arange(x_min, x_max, cell_size), np.arange(y_min, y_max, cell_size))
    meshed_data = pd.DataFrame(np.c_[xx.ravel(), yy.ravel()])
    
    # Add interactions
    for i in range(data.shape[1]):
        if i <= 1:
            continue
        
        meshed_data = np.c_[meshed_data, np.power(xx.ravel(), i)]

    if model != None:
        # Predict on the mesh
        if probabilities:
            Z = model.predict_proba(meshed_data)[:, 1].reshape(xx.shape)
        else:
            Z = model.predict(meshed_data).reshape(xx.shape)
    
    # Plot mesh and data
    if data.shape[1] > 2:
        plt.title("humor^(" + str(range(1,data.shape[1])) + ") and number_pets")
    else:
        plt.title("humor and number_pets")
    plt.xlabel("humor")
    plt.ylabel("number_pets")
    if surface and model != None:
        cs = plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.4)
    color = ["blue" if t == 0 else "red" for t in target]
    plt.scatter(data[data.columns[0]], data[data.columns[1]], color=color)
Example #25
0
def plot2Dnet():
    allCells = f.net.allCells
    figure(figsize=(12,12))
    colorList = [[0.42,0.67,0.84], [0.90,0.76,0.00], [0.42,0.83,0.59], [0.90,0.32,0.00],
                [0.34,0.67,0.67], [0.90,0.59,0.00], [0.42,0.82,0.83], [1.00,0.85,0.00],
                [0.33,0.67,0.47], [1.00,0.38,0.60], [0.57,0.67,0.33], [0.5,0.2,0.0],
                [0.71,0.82,0.41], [0.0,0.2,0.5]] 
    popLabels = [pop.tags['popLabel'] for pop in f.net.pops if pop.tags['cellModel'] not in ['NetStim']]
    popColors = {popLabel: colorList[ipop%len(colorList)] for ipop,popLabel in enumerate(popLabels)} # dict with color for each pop
    cellColors = [popColors[cell.tags['popLabel']] for cell in f.net.cells]
    posX = [cell['tags']['x'] for cell in allCells]  # get all x positions
    posY = [cell['tags']['y'] for cell in allCells]  # get all y positions
    scatter(posX, posY, s=60, color = cellColors) # plot cell soma positions
    for postCell in allCells:
        for con in postCell['conns']:  # plot connections between cells
            posXpre,posYpre = next(((cell['tags']['x'],cell['tags']['y']) for cell in allCells if cell['gid']==con['preGid']), None)  
            posXpost,posYpost = postCell['tags']['x'], postCell['tags']['y'] 
            color='red'
            if con['synMech'] in ['inh', 'GABA', 'GABAA', 'GABAB']:
                color = 'blue'
            width = 0.1 #50*con['weight']
            plot([posXpre, posXpost], [posYpre, posYpost], color=color, linewidth=width) # plot line from pre to post
    xlabel('x (um)')
    ylabel('y (um)') 
    xlim([min(posX)-0.05*max(posX),1.05*max(posX)]) 
    ylim([min(posY)-0.05*max(posY),1.05*max(posY)])
    fontsiz = 12
    for popLabel in popLabels:
        plot(0,0,color=popColors[popLabel],label=popLabel)
    legend(fontsize=fontsiz, bbox_to_anchor=(1.02, 1), loc=2, borderaxespad=0.)
    ax = gca()
    ax.invert_yaxis()
def plot_prediction_accuracy(x, y):
    plt.scatter(x, y, c='g', alpha=0.5)
    plt.title('Logistic Regression')
    plt.xlabel('r')
    plt.ylabel('Prediction Accuracy')
    plt.xlim(0,200)
    plt.show()
Example #27
0
    def visualization2(self, sp_to_vis=None):
        if sp_to_vis:
            species_ready = list(set(sp_to_vis).intersection(self.all_sp_signatures.keys()))
        else:
            raise Exception('list of driver species must be defined')

        if not species_ready:
            raise Exception('None of the input species is a driver')

        for sp in species_ready:
            # Setting up figure
            plt.figure()
            plt.subplot(313)

            mon_val = OrderedDict()
            signature = self.all_sp_signatures[sp]
            for idx, mon in enumerate(list(set(signature))):
                if mon[0] == 'C':
                    mon_val[self.all_comb[sp][mon] + (-1,)] = idx
                else:
                    mon_val[self.all_comb[sp][mon]] = idx

            mon_rep = [0] * len(signature)
            for i, m in enumerate(signature):
                if m[0] == 'C':
                    mon_rep[i] = mon_val[self.all_comb[sp][m] + (-1,)]
                else:
                    mon_rep[i] = mon_val[self.all_comb[sp][m]]
            # mon_rep = [mon_val[self.all_comb[sp][m]] for m in signature]

            y_pos = numpy.arange(len(mon_val.keys()))
            plt.scatter(self.tspan[1:], mon_rep)
            plt.yticks(y_pos, mon_val.keys())
            plt.ylabel('Monomials', fontsize=16)
            plt.xlabel('Time(s)', fontsize=16)
            plt.xlim(0, self.tspan[-1])
            plt.ylim(0, max(y_pos))

            plt.subplot(312)

            for name in self.model.odes[sp].as_coefficients_dict():
                mon = name
                mon = mon.subs(self.param_values)
                var_to_study = [atom for atom in mon.atoms(sympy.Symbol)]
                arg_f1 = [numpy.maximum(self.mach_eps, self.y[str(va)][1:]) for va in var_to_study]
                f1 = sympy.lambdify(var_to_study, mon)
                mon_values = f1(*arg_f1)
                mon_name = str(name).partition('__')[2]
                plt.plot(self.tspan[1:], mon_values, label=mon_name)
            plt.ylabel('Rate(m/sec)', fontsize=16)
            plt.legend(bbox_to_anchor=(-0.1, 0.85), loc='upper right', ncol=1)

            plt.subplot(311)
            plt.plot(self.tspan[1:], self.y['__s%d' % sp][1:], label=parse_name(self.model.species[sp]))
            plt.ylabel('Molecules', fontsize=16)
            plt.legend(bbox_to_anchor=(-0.15, 0.85), loc='upper right', ncol=1)
            plt.suptitle('Tropicalization' + ' ' + str(self.model.species[sp]))

            # plt.show()
            plt.savefig('s%d' % sp + '.png', bbox_inches='tight', dpi=400)
Example #28
0
def plot(y, function):
    """ Show an animation of Poincare plot.

    --- arguments ---
    y: A list of initial values
    function: function which is argument of Runge-Kutta solver
    """
    h = dt
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.grid()
    time_text = ax.text(0.05, 0.9, '', transform=ax.transAxes)
    plt.ion()

    for i in range(nmax + 1):
        for j in range(nstep):
            rk4 = RK.RK4(function)
            y = rk4.solve(y, j * h, h)
            # -pi <= theta <= pi
            while y[0] > pi:
                y[0] = y[0] - 2 * pi
            while y[0] < -pi:
                y[0] = y[0] + 2 * pi

        if ntransient <= i < nmax:          # <-- draw the poincare plots
            plt.scatter(y[0], y[1], s=2.0, marker='o', color='blue')
            time_text.set_text('n = %d' % i)
            plt.draw()

        if i == nmax:                       # <-- to stop the interactive mode
            plt.ioff()
            plt.scatter(y[0], y[1], s=2.0, marker='o', color='blue')
            time_text.set_text('n = %d' % i)
            plt.show()
Example #29
0
def show_ratios(cpu):
    cpu.ratios.sort(key=lambda x: x[0])
    pl.figure("Tuning")
    pl.plot([x[0] for x in cpu.ratios], [x[2] for x in cpu.ratios])
    
    pl.figure("Tuning samples")
    pl.scatter([x[0] for x in cpu.ratios], [x[2] * x[0] for x in cpu.ratios])
 def scatter(title, file_name, x_array, y_array, size_array, x_label, \
             y_label, x_range, y_range, print_pdf):
     '''
     Plots the given x value array and y value array with the specified 
     title and saves with the specified file name. The size of points on
     the map are proportional to the values given in size_array. If 
     print_pdf value is 1, the image is also written to pdf file. 
     Otherwise it is only written to png file.
     '''
     rc('text', usetex=True)
     rc('font', family='serif')
     plt.clf() # clear the ploting window, a must.                               
     plt.scatter(x_array, y_array, s =  size_array, c = 'b', marker = 'o', alpha = 0.4)
     if x_label != None:   
         plt.xlabel(x_label)
     if y_label != None:
         plt.ylabel(y_label)                
     plt.axis ([0, x_range, 0, y_range])
     plt.grid(True)
     plt.suptitle(title)
 
     Plotter.print_to_png(plt, file_name)
     
     if print_pdf:
         Plotter.print_to_pdf(plt, file_name)
Example #31
0
def draw_solution(start_node, final_node=None):
    ax = plt.gca()

    def draw_path(u, v, arrow_length=.01, color=(.8, .8, .8), lw=1):
        du = u.direction
        plt.arrow(u.pose[X],
                  u.pose[Y],
                  du[0] * arrow_length,
                  du[1] * arrow_length,
                  head_width=.005,
                  head_length=.01,
                  fc=color,
                  ec=color)
        dv = v.direction
        plt.arrow(v.pose[X],
                  v.pose[Y],
                  dv[0] * arrow_length,
                  dv[1] * arrow_length,
                  head_width=.005,
                  head_length=.01,
                  fc=color,
                  ec=color)
        center, radius = find_circle(u, v)
        du = u.position - center
        theta1 = np.arctan2(du[1], du[0])
        dv = v.position - center
        theta2 = np.arctan2(dv[1], dv[0])
        # Check if the arc goes clockwise.
        if np.cross(u.direction, du).item() > 0.:
            theta1, theta2 = theta2, theta1
        ax.add_patch(
            patches.Arc(center,
                        radius * 2.,
                        radius * 2.,
                        theta1=theta1 / np.pi * 180.,
                        theta2=theta2 / np.pi * 180.,
                        color=color,
                        lw=lw))
        return abs((theta2 - theta1) * radius)

    # points = []
    # s = [(start_node, None)]  # (node, parent).
    # while s:
    #   v, u = s.pop()
    #   if hasattr(v, 'visited'):
    #     continue
    #   v.visited = True
    #   # Draw path from u to v.
    #   if u is not None:
    #     draw_path(u, v)
    #   points.append(v.pose[:2])
    #   for w in v.neighbors:
    #     s.append((w, v))
    #
    # points = np.array(points)
    # plt.scatter(points[:, 0], points[:, 1], s=10, marker='o', color=(.8, .8, .8))
    length = 0
    if final_node is not None:
        plt.scatter(final_node.position[0],
                    final_node.position[1],
                    s=10,
                    marker='o',
                    color='k')
        # Draw final path.
        v = final_node
        while v.parent is not None:
            d = draw_path(v.parent, v, color='k', lw=2)
            v = v.parent
            length += d
    return length
Example #32
0
print("rho: ", stat_spearman)
print("tau: ", stat_kendall)
print("score: ", ridge.score(pca_x, target))

# needed to work properly
predicted = predicted.reshape(-1, 1)

# A linear regression is needed to visually compare predicted age vs real age
LR = LinearRegression().fit(predicted, target)
predicted = LR.predict(predicted)

# needed to work properly
predicted = predicted.reshape(-1, 1)

# plot
plt.scatter(target, predicted, s=3, color="black")
plt.title("Regression after PCA")
plt.plot([min(target), max(target)], [
    min(target) * LR.coef_[0] + LR.intercept_,
    max(target) * LR.coef_[0] + LR.intercept_
],
         'r-.',
         label="Regression line Y = " + str(LR.coef_[0])[:5] + "*X + " +
         str(LR.intercept_)[:6])

plt.xlabel("Real age (years)")
plt.ylabel("Predicted age (years)")

plt.grid()
plt.legend()
Example #33
0
    for j in range(2, 10):
        data[i][j - 2] = rows[j]

data1 = vq.whiten(data)

Ave = sch.linkage(data1, method='average')
P = sch.dendrogram(Ave)
plt.xlabel('类别标签')
plt.ylabel('距离')
plt.title('类平均法')
plt.show()

Ward = sch.linkage(data1, method='ward')
P = sch.dendrogram(Ward)
plt.xlabel('类别标签')
plt.ylabel('距离')
plt.title('离差平方和法')
plt.show()

kmeans_cent = vq.kmeans(data1, 5)
print('聚类中心为:\n', kmeans_cent[0])

p = plt.figure(figsize=(16, 16))
plt.title('聚类中心散点图')
for i in range(8):
    for j in range(8):
        ax = p.add_subplot(8, 8, i * 8 + 1 + j)
        plt.scatter(data1[:, j], data1[:, i])
        plt.scatter(kmeans_cent[0][:, j], kmeans_cent[0][:, i], c='r')

plt.show()
Example #34
0
    d = 0
    for i in Data['Creatinin'].values:
        s = CU_SUM[t][-1] + (i - mean)
        CU_SUM[t].append(s)
        res = pd.DataFrame([Data.index[d], c, s]).T
        res.columns = columns_C
        CU_SUM_Mid_C = CU_SUM_Mid_C.append(res)
        d = d + 1
    t = t + 1

CU_SUM_Mid_C.to_csv("D:\\Simulation Model\\Change Point Analysis\\Data\\Creatinin_CUSUM.csv")

#Troponin and Creatinin
Troponin_Creatnin = pd.read_csv("D:\\Simulation Model\\Change Point Analysis\\Data\\Troponin_CreatninFinal.csv")
Case = Troponin_Creatnin[Troponin_Creatnin.Patient_ID == 439505]
plt.scatter(x = np.log(Troponin_Creatnin.Troponin), y = np.log(Troponin_Creatnin.Creatinin))
markers = ['.','o','v','^', '<', '>', '1', '2','3','4','8','s','p','*','h','H','+','x','D','d','|','_']
color_list = []
for name, hex in matplotlib.colors.cnames.items():
    color_list.append(name)
plt.figure(figsize=(14, 8))
plt.xticks(fontsize=14)
plt.yticks(rotation = 'vertical', fontsize=14)
plt.xlabel('Time', fontsize=14)
plt.ylabel('CUMSUM', fontsize=14)



#Patient Information
Patient_Info = pd.read_csv("D:\\Simulation Model\\Change Point Analysis\\Data\\Patient Information2.csv")
ACS_ID = Patient_Info[Patient_Info.patient_id.isin(CU_SUM_Mid_T.Patient_ID)]
Example #35
0
        cost_1.append(cost(X, y, theta))
    return theta, cost_1


if __name__ == '__main__':
    '''散点图'''
    data = pd.read_csv('ex2data1.txt',
                       header=None,
                       names=['exam1', 'exam2', 'admitted'])
    data.insert(0, 'Ones', 1)
    print(data.describe())
    positive = data[data['admitted'].isin(['1'])]
    negetive = data[data['admitted'].isin(['0'])]
    plt.scatter(positive['exam1'],
                positive['exam2'],
                marker='x',
                c='black',
                label='admitted')
    plt.scatter(negetive['exam1'],
                negetive['exam2'],
                marker='o',
                c='red',
                label='not admintted')
    plt.legend(loc='upper right')  #图例位置
    # plt.show()
    '''损失函数、梯度下降法'''
    X, y = np.array(data.iloc[:, :-1]), np.array(data.iloc[:, -1:])
    theta = np.zeros((1, 3))
    print(X.shape, y.shape, theta.shape)
    alpha, epoch = 0.001, 500000
    theta, cost_1 = gradientDescent(X, y, theta, alpha, epoch)
Example #36
0
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
import seaborn as sns
from math import cos, sin, log, tan, gamma, pi, exp, sqrt

p = plt.figure(figsize=(14, 14), facecolor='black', dpi=600)
p = plt.axis('off')


def iterator(r, x0, n):
    y = [x0]
    for z in range(n):
        y.append(r * (y[-1]) * (1 - y[-1]))
    return (y)


n = 3000
for z, i in zip(np.linspace(1, 4, n), range(n)):
    lista = iterator(z, 0.8, 100)
    plt.scatter(np.linspace(0.5, 4, 10 * n)[i * 10:(i + 1) * 10],
                lista[-10:],
                s=0.7,
                color='w')
plt.savefig(f'C:/Users/Alejandro/Pictures/RandomPlots/29012020.png',
            facecolor='black')
Example #37
0
def plot_ifa_parameters_and_ppc(estimated_parameters, true_parameters, sess):
    map_estimates = dict(estimated_parameters)
    #map_estimates.pop('sources')

    true_parameters_vars = true_parameters.copy()
    map_sources = map_estimates.pop('sources')
    #map_estimates = sess.run(map_estimates)

    n_observations = true_parameters['data'].shape[0]
    #testmodel,source = centeredIndependentFactorAnalysisTest2(n_observations=n_observations, **map_estimates)
    testmodel, source, data_mean = centeredIndependentFactorAnalysisTest(
        n_observations=n_observations, mc_samples=1, **map_estimates)
    #print(sess.run(map_estimates['data_var']))
    #print(sess.run(source.distribution.sample((5000))).var(0))
    ppc = sess.run(testmodel.distribution.sample())

    #plt.title('True source distributions')
    plot_source_distributions(true_parameters['mixture_component_var'],
                              true_parameters['mixture_weights'], sess)
    #plt.title('Estimated source distributions')
    plot_source_distributions(map_estimates['mixture_component_var'],
                              map_estimates['mixture_weights'], sess)

    fig, ax = plt.subplots()
    plt.title('Variance of sample is {}'.format(true_parameters['data'].var()))
    plt.scatter(*true_parameters['data'].T, alpha=.5)
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    plt.show()

    #fig, ax = plt.subplots()
    #plt.title('Variance of sample is {}'.format(ppc.var()))
    #plt.scatter(*ppc.T, alpha=.5,c='orange')
    #ax.set_xlim(xlim)
    #ax.set_ylim(ylim)
    #plt.show()

    fig, ax = plt.subplots()
    plt.title('Variance of sample is {}'.format(ppc.var()))
    plt.scatter(*true_parameters['data'].T, alpha=.5)
    plt.scatter(*ppc.T, alpha=.5)
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    plt.show()

    fig, ax = plt.subplots()
    plt.title('True and estimated sources')
    plt.scatter(*true_parameters['sources'].T)
    #map_sources = sess.run(tf.squeeze(source.distribution.sample((n_observations))))
    n_sources = map_sources.shape[1]
    plt.scatter(*map_sources.T)
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    plt.show()

    fig, ax = plt.subplots()
    plt.title('True and estimated data_mean')
    data_mean_s = sess.run(tf.squeeze(data_mean))
    true_data_mean = np.einsum(
        'ik,kj->ij', true_parameters['sources'],
        true_parameters['factor_loadings'] / np.linalg.norm(
            true_parameters['factor_loadings'], axis=1, keepdims=True))
    plt.scatter(*true_data_mean.T)
    plt.scatter(*data_mean_s.T)
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    plt.show()

    n_components_in_mixture = true_parameters['mixture_component_var'].shape[1]

    for source in range(n_sources):
        plt.title('mixture component variances, true and estimated')
        plt.bar(np.arange(n_components_in_mixture),
                true_parameters_vars['mixture_component_var'][source, :])
        plt.bar(np.arange(n_components_in_mixture),
                map_estimates['mixture_component_var'][source, :],
                alpha=.5)
        plt.show()

    for source in range(n_sources):
        plt.title('mixing weights, true and estimated')
        plt.bar(np.arange(n_components_in_mixture),
                true_parameters_vars['mixture_weights'][source, :])
        plt.bar(np.arange(n_components_in_mixture),
                map_estimates['mixture_weights'][source, :],
                alpha=.5)
        plt.show()

    fgen = true_parameters['factor_loadings']
    fpred = map_estimates['factor_loadings']
    fig, ax = plt.subplots()
    ax.scatter(*true_parameters['data'].T, alpha=0.3)
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()

    for fg in fgen:
        plt.plot(fg[0] * np.array([1, -1]),
                 fg[1] * np.array([1, -1]),
                 color='y',
                 label='true')

    for fg in fpred:
        plt.plot(fg[0] * np.array([1, -1]),
                 fg[1] * np.array([1, -1]),
                 color='r',
                 linestyle='-.',
                 label='predicted')

    #for fg in fica_n:
    #    plt.plot(fg[0]*np.array([1,-1]),fg[1]*np.array([1,-1]),color='k',linestyle='--',label='initial')

    plt.legend()
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
Example #38
0
def visualize(trainX, trainY):
    color=[colors[int(i)] for i in trainY]
    plb.scatter(trainX[:,0],trainX[:,1],c=color)
    plb.xlabel('x')
    plb.ylabel('y')
    plb.show()
Example #39
0
        elem += 1

fraction = np.zeros(max_speed, dtype=np.float32)
for i in range(start_speed, len(fraction)):
    if (np.float32(num_below_thres[i - start_speed] +
                   num_over_thres[i - start_speed]) > 0):
        fraction[i] = np.float32(num_below_thres[i - start_speed]) / (
            num_below_thres[i - start_speed] + num_over_thres[i - start_speed])
    else:
        fraction[i] = -1
print fraction

figure = plt.figure(figsize=(15, 10))
plot_abs = plt.subplot(2, 2, 1)
plt.title("Measurements")
plt.scatter(speed, score, color="b")
plt.xlim([-1, max_speed])
plt.xlabel("Windspeed [m/s]")
plt.ylim([-2, 32])
plt.ylabel("Correct Score [MW]")

plot_scatter = plt.subplot(2, 2, 2)
plt.title("KNN Interpolation of the Response Curve")
plt.plot(T, Y_hat, color='b')
plt.scatter(speed, score, color="#CCCCCC")
plt.xlim([-1, max_speed])
plt.xlabel("Windspeed [m/s]")
plt.ylim([-2, 32])
plt.ylabel("Correct Score [MW]")

plot_abs = plt.subplot(2, 2, 3)
    print('dlkjfkldfd')

    dummies = np.empty(739)

    dummies[0:100] = 9
    dummies[100:200] = 8
    dummies[200:300] = 7
    dummies[300:400] = 6
    dummies[400:500] = 5
    dummies[500:600] = 4
    dummies[600:700] = 3
    dummies[700:739] = 2

    #print(cols.index("red"))

    plt.scatter(x[0:100], dummies[0:100], s=10, c=cols, label='Monoysyllables')
    plt.scatter(x[0:100], dummies[100:200], s=10, c=cols[100:200])
    plt.scatter(x[0:100], dummies[200:300], s=10, c=cols[200:300])
    plt.scatter(x[0:100], dummies[300:400], s=10, c=cols[300:400])
    plt.scatter(x[0:100], dummies[400:500], s=10, c=cols[400:500])
    plt.scatter(x[0:100], dummies[500:600], s=10, c=cols[500:600])
    plt.scatter(x[0:100], dummies[600:700], s=10, c=cols[600:700])
    plt.scatter(x[0:39], dummies[700:739], s=10, c=cols[400:500])

    plt.scatter(x[0], dummies[500], s=10, color='red', label='Polysyllables')

    frame1 = plt.gca()
    # frame1.legend(('monosyllables', 'polysyllables'))

    frame1.axes.get_yaxis().set_visible(False)
    frame1.axes.get_xaxis().set_visible(False)
Example #41
0
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
from scipy.stats import norm
import seaborn as sns
from math import cos, sin, log, tan, gamma, pi, exp, sqrt, cosh, sinh

p = plt.figure(figsize=(14, 14), facecolor='black', dpi=400)
p = plt.axis('off')
n = 80
for z in list(np.linspace(0, pi, n)):
    plt.scatter([cos(x * z) for x in np.linspace(0, 1, n)],
                [x for x in np.linspace(0, 10, n)],
                alpha=0.8,
                color=[
                    plt.cm.rainbow(np.random.uniform(0, z / (pi)))
                    for _ in range(n)
                ],
                s=17)
    plt.scatter([-cos(x * z) for x in np.linspace(0, 1, n)],
                [x + 10 for x in -np.linspace(0, 10, n)],
                alpha=0.8,
                color=[
                    plt.cm.rainbow(np.random.uniform(0, z / (pi)))
                    for _ in range(n)
                ],
                s=17)
plt.savefig(f'C:/Users/Alejandro/Pictures/RandomPlots/19022020.png',
            facecolor='black')
Example #42
0
cols = (
    ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
     'Loan_Amount_Term'])  # numeric features
#for c in cols:
#    train.hist(column=c, bins=50)
#    # train.boxplot(column=c, by = 'Gender')

pd.crosstab(train['Education'],
            train['Gender'],
            margins=True,
            normalize='columns')
pd.crosstab(train['Credit_History'],
            train['Property_Area'],
            margins=True,
            normalize='columns')
plt.scatter(train['LoanAmount'], train['Credit_History'])
plt.show()

# Impute missing values
# ---------------------
train_mod = train.copy()
test_mod = test.copy()

# Exclude observations with missing 'Credit_History'
train_mod = train_mod.dropna(subset=['Credit_History']).reset_index()

# Impute 'LoanAmount' with median values
train_mod['LoanAmount'] = train_mod['LoanAmount'].fillna(
    train_mod['LoanAmount'].median())
test_mod['LoanAmount'] = test_mod['LoanAmount'].fillna(
    test_mod['LoanAmount'].median())
Example #43
0
def biplot(objects, eigenvectors=None, eigenvalues=None,
           vector_labels=None, object_labels=None, scaling=1, xpc=0, ypc=1,
           show_arrows=True,
           group=None, plot_ellipses=False, confidense_level=0.95,
           axis_label='PC',
           arrow_head_width=None):

    """
    Creates a biplot with:

    Parameters:
        objects: 2D numpy array of scores
        eigenvectors: 2D numpy array of loadings
        eigenvalues: 1D numpy array of eigenvalues, necessary to compute correlation biplot_scores
        vector_labels: 1D numpy array or list of labels for loadings
        object_labels: 1D numpy array or list of labels for objects
        show_arrows: logical
        scaling: either 1 or "distance" for distance biplot, either 2 or "correlation" for correlation biplot
        xpc, ypc: integers, index of the axis to plot. generally xpc=0 and ypc=1 to plot the first and second components
        group: 1D numpy array of categories to color scores
        plot_ellipses: 2D numpy array of error (mean) and deviation (samples) ellipses around groups
        confidense_level: confidense level for the ellipses
        axis_label: string, the text describing the axes
    Returns:
         biplot as matplotlib object
    """
    # select scaling
    if scaling == 1 or scaling == 'distance':
        scores = objects
        loadings = eigenvectors
    elif scaling == 2 or scaling == 'correlation':
        scores = objects.dot(np.diag(eigenvalues**(-0.5)))
        loadings = eigenvectors.dot(np.diag(eigenvalues**0.5))
    else:
        raise ValueError("No such scaling")

    if eigenvectors is None:
        loadings=np.array([[0, 0]]) # to include in the computation of plot limits

    # draw the cross
    plt.axvline(0, ls='solid', c='grey', linewidth=0.5)
    plt.axhline(0, ls='solid', c='grey', linewidth=0.5)

    # draw the ellipses
    if group is not None and plot_ellipses:
        groups = np.unique(group)
        for i in range(len(groups)):
            mean = np.mean(scores[group==groups[i], :], axis=0)
            plt.text(mean[0], mean[1], groups[i],
                     ha='center', va='center', color='k', size=15)
            ell_dev = ellipse(X=scores[group==groups[i], :], level=confidense_level, method='deviation')
            ell_err = ellipse(X=scores[group==groups[i], :], level=confidense_level, method='error')
            plt.fill(ell_err[:,0], ell_err[:,1], alpha=0.6, color='grey')
            plt.fill(ell_dev[:,0], ell_dev[:,1], alpha=0.2, color='grey')

    # plot scores
    if group is None:
        if object_labels is None:
            plt.scatter(scores[:,xpc], scores[:,ypc])
        else:
            for i in range(scores.shape[0]):
                #print('i=', i)
                #print(scores[i,xpc], scores[i,ypc])
                plt.text(scores[i, xpc], scores[i, ypc], object_labels[i],
                         color = 'blue', ha = 'center', va = 'center')
    else:
        if object_labels is None:
            for i in range(len(np.unique(group))):
                cond = group == np.unique(group)[i]
                plt.plot(scores[cond, 0], scores[cond, 1], 'o')
        else:
            for i in range(len(np.unique(group))):
                cond = group == np.unique(group)[i]
                scores_gr = scores[cond, 0]
                for j in range(scores_gr.shape[0]):
                    plt.text(scores[j, xpc], scores[j, ypc], object_labels[j],
                             ha = 'center', va = 'center')

    # plot loadings
    if eigenvectors is not None:
        if show_arrows:
            if arrow_head_width is None:
                arrow_head_width = np.ptp(objects)/100
            for i in range(loadings.shape[0]):
                plt.arrow(0, 0, loadings[i, xpc], loadings[i, ypc],
                          color = 'black', head_width=arrow_head_width)

        # plot loading labels
        if vector_labels is None:
            plt.plot(loadings[:, xpc], loadings[:, ypc], marker='+', color='red', ls='None')
        else:
            if show_arrows:
                expand_load_text = 1.15
            else:
                expand_load_text = 1
            for i in range(loadings.shape[1]):
                plt.text(loadings[i, xpc]*expand_load_text, loadings[i, ypc]*expand_load_text, vector_labels[i],
                         color = 'black', ha = 'center', va = 'center') # , fontsize=20

    # axis labels
    plt.xlabel(axis_label + str(xpc+1))
    plt.ylabel(axis_label + str(ypc+1))

    # axis limit
    xlim = [np.hstack((loadings[:, xpc], scores[:,xpc])).min(),
            np.hstack((loadings[:, xpc], scores[:,xpc])).max()]
    margin_x = 0.05*(xlim[1]-xlim[0])
    xlim[0]=xlim[0]-margin_x
    xlim[1]=xlim[1]+margin_x

    ylim = [np.hstack((loadings[:, ypc], scores[:,ypc])).min(),
            np.hstack((loadings[:, ypc], scores[:,ypc])).max()]
    margin_y = 0.05*(ylim[1]-ylim[0])
    ylim[0]=ylim[0]-margin_y
    ylim[1]=ylim[1]+margin_y
    plt.xlim(xlim)
    plt.ylim(ylim)
Example #44
0
plt.xlabel('Number of clusters')
plt.ylabel('Average distance')
plt.title('Selecting k with the Elbow Method')

# Interpret 3 cluster solution
model3 = KMeans(n_clusters=3)
model3.fit(clus_train)
clusassign = model3.predict(clus_train)
# plot clusters

from sklearn.decomposition import PCA
pca_2 = PCA(2)
plot_columns = pca_2.fit_transform(clus_train)
plt.scatter(
    x=plot_columns[:, 0],
    y=plot_columns[:, 1],
    c=model3.labels_,
)
plt.xlabel('Canonical variable 1')
plt.ylabel('Canonical variable 2')
plt.title('Scatterplot of Canonical Variables for 3 Clusters')
plt.show()

clus_train.reset_index(level=0, inplace=True)
cluslist = list(clus_train['index'])
labels = list(model3.labels_)
newlist = dict(zip(cluslist, labels))
newlist
newclus = DataFrame.from_dict(newlist, orient='index')
newclus
newclus.columns = ['cluster']
Example #45
0
def plot(data='2018', errorbars=True, nonEssentials=False, \
         c404=0., cneib=0., ccontam=0.):
    if data == '2017':
        tbl = load17table()
    elif data == '2018':
        tbl = load18table()
    else:
        print "The value for 'data' must be either '2017' or '2018'."

    #print tbl

    # Defining Variables

    time = tbl['time']
    fluxV = tbl['fluxV404']
    fluxC = tbl['fluxContam']
    fluxC4 = tbl['fluxC4']
    fluxN = tbl['fluxNeighbor']
    dyV = tbl['fluxErrV404']
    dyC = tbl['fluxErrContam']
    dyC4 = tbl['fluxErrC4']
    dyN = tbl['fluxErrNeighbor']
    fluxR = tbl['fluxRandomStar']
    dyR = tbl['fluxErrRandomStar']

    # Plot of time vs. flux(V404)

    fig1 = plt.figure(1)
    fig1.clf()
    plt.scatter(time, fluxV, alpha=0.4, s=16, zorder=25)
    if errorbars:
        plt.errorbar(time,
                     fluxV,
                     yerr=dyV,
                     fmt='o',
                     ms=1,
                     ecolor='0.3',
                     alpha=0.2,
                     zorder=10)
    plt.xlabel('Time')
    plt.ylabel('Flux (V404)')

    # Plot of time vs. flux(Contaminant)

    fig2 = plt.figure(2)
    fig2.clf()
    plt.scatter(time, fluxC, alpha=0.4, s=16, zorder=25)
    if errorbars:
        plt.errorbar(time,
                     fluxC,
                     yerr=dyC,
                     fmt='o',
                     ms=1,
                     ecolor='0.3',
                     alpha=0.2,
                     zorder=10)
    plt.xlabel('Time')
    plt.ylabel('Flux (Contaminant)')

    # Plot of time vs. flux(C4)

    fig3 = plt.figure(3)
    fig3.clf()
    plt.scatter(time, fluxC4, alpha=0.4, s=16, zorder=25)
    if errorbars:
        plt.errorbar(time,
                     fluxC4,
                     yerr=dyC4,
                     fmt='o',
                     ms=1,
                     ecolor='0.3',
                     alpha=0.2,
                     zorder=10)
    plt.xlabel('Time')
    plt.ylabel('Flux (C4)')

    # Plot of time vs. flux(Neighbor)

    fig4 = plt.figure(4)
    fig4.clf()
    plt.scatter(time, fluxN, alpha=0.4, s=16, zorder=25)
    if errorbars:
        plt.errorbar(time,
                     fluxN,
                     yerr=dyN,
                     fmt='o',
                     ms=1,
                     ecolor='0.3',
                     alpha=0.2,
                     zorder=10)
    plt.xlabel('Time')
    plt.ylabel('Flux (Neighbor)')

    # Plot of Flux(V404) vs. Flux(Contaminant)

    fig5 = plt.figure(5)
    fig5.clf()
    dum5 = plt.scatter(fluxV + c404,
                       fluxC + ccontam,
                       alpha=0.4,
                       s=16,
                       zorder=25,
                       c=time,
                       cmap='hsv')
    if errorbars:
        plt.errorbar(fluxV + c404,
                     fluxC + ccontam,
                     xerr=dyV,
                     yerr=dyC,
                     fmt='o',
                     ms=1,
                     ecolor='0.3',
                     alpha=0.2,
                     zorder=10)
    plt.xlabel('Flux (V404)')
    plt.ylabel('Flux (Contaminant)')
    plt.colorbar(dum5)

    # let's do a ratio plot
    if ccontam > 0. and c404 > 0.:
        absV = fluxV + c404
        absC = fluxC + ccontam

        ratioContam = absC / (absV + absC)
        unctyRatioSquared = (dyV**2 + dyC**2)/(absV + absC)**2 + \
                            (dyC/absC)**2
        unctyOfRatio = ratioContam * np.sqrt(unctyRatioSquared)

        # now let's plot this
        fig55 = plt.figure(55)
        fig55.clf()
        dum55 = plt.scatter(absV, ratioContam, alpha=0.4, s=16, \
                            zorder=25, c=time, cmap='hsv')

        if errorbars:
            dum55b = plt.errorbar(absV, ratioContam, xerr=dyV, \
                                  yerr=unctyOfRatio, fmt='o', ms=1, \
                                  ecolor='0.3', alpha=0.2, zorder=10)
        plt.xlabel('Flux (V404)')
        plt.ylabel('(Contaminant / (V404 + contaminant))')
        plt.colorbar(dum55)

    # Plot of Flux(V404) vs. Flux(Neighbor)

    fig6 = plt.figure(6)
    fig6.clf()
    dum6 = plt.scatter(fluxV,
                       fluxN,
                       alpha=0.4,
                       s=16,
                       zorder=25,
                       c=time,
                       cmap='hsv')
    if errorbars:
        plt.errorbar(fluxV,
                     fluxN,
                     xerr=dyV,
                     yerr=dyN,
                     fmt='o',
                     ms=1,
                     ecolor='0.3',
                     alpha=0.2,
                     zorder=10)
    plt.xlabel('Flux (V404)')
    plt.ylabel('Flux (Neighbor)')
    plt.colorbar(dum6)

    # Plot of Flux(V404) vs. Flux(C4)

    fig7 = plt.figure(7)
    fig7.clf()
    dum7 = plt.scatter(fluxV,
                       fluxC4,
                       alpha=0.4,
                       s=16,
                       zorder=25,
                       c=time,
                       cmap='hsv')
    if errorbars:
        plt.errorbar(fluxV,
                     fluxC4,
                     xerr=dyV,
                     yerr=dyC4,
                     fmt='o',
                     ms=1,
                     ecolor='0.3',
                     alpha=0.2,
                     zorder=10)
    plt.xlabel('Flux (V404)')
    plt.ylabel('Flux (C4)')
    plt.colorbar(dum7)

    # Plot of Flux(Contaminant) vs. Flux(C4)

    if nonEssentials:
        fig8 = plt.figure(8)
        fig8.clf()
        dum8 = plt.scatter(fluxC,
                           fluxC4,
                           alpha=0.4,
                           s=16,
                           zorder=25,
                           c=time,
                           cmap='hsv')
        if errorbars:
            plt.errorbar(fluxC,
                         fluxC4,
                         xerr=dyC,
                         yerr=dyC4,
                         fmt='o',
                         ms=1,
                         ecolor='0.3',
                         alpha=0.2,
                         zorder=10)
        plt.xlabel('Flux (Contaminant)')
        plt.ylabel('Flux (C4)')
        plt.colorbar(dum8)

        # Plot of Flux(Contaminant) vs. Flux(Neighbor)

        fig9 = plt.figure(9)
        fig9.clf()
        dum9 = plt.scatter(fluxC,
                           fluxN,
                           alpha=0.4,
                           s=16,
                           zorder=25,
                           c=time,
                           cmap='hsv')
        if errorbars:
            plt.errorbar(fluxC,
                         fluxN,
                         xerr=dyC,
                         yerr=dyN,
                         fmt='o',
                         ms=1,
                         ecolor='0.3',
                         alpha=0.2,
                         zorder=10)
        plt.xlabel('Flux (Contaminant)')
        plt.ylabel('Flux (Neighbor)')
        plt.colorbar(dum9)

        # Plot of Flux(Neighbor) vs. Flux(C4)

        fig10 = plt.figure(10)
        fig10.clf()
        dum10 = plt.scatter(fluxN,
                            fluxC4,
                            alpha=0.4,
                            s=16,
                            zorder=25,
                            c=time,
                            cmap='hsv')
        if errorbars:
            plt.errorbar(fluxN,
                         fluxC4,
                         xerr=dyN,
                         yerr=dyC4,
                         fmt='o',
                         ms=1,
                         ecolor='0.3',
                         alpha=0.2,
                         zorder=10)
        plt.xlabel('Flux (Neighbor)')
        plt.ylabel('Flux (C4)')
        plt.colorbar(dum10)

        # Plot of Flux(Random Star) vs. Flux(C4)

        fig11 = plt.figure(11)
        fig11.clf()
        dum11 = plt.scatter(fluxR,
                            fluxC4,
                            alpha=0.4,
                            s=16,
                            zorder=25,
                            c=time,
                            cmap='hsv')
        if errorbars:
            plt.errorbar(fluxR,
                         fluxC4,
                         xerr=dyR,
                         yerr=dyC4,
                         fmt='o',
                         ms=1,
                         ecolor='0.3',
                         alpha=0.2,
                         zorder=10)
        plt.xlabel('Flux (Random Star (#6 on Map))')
        plt.ylabel('Flux (C4)')
        plt.colorbar(dum11)
Example #46
0
def start_anon():
    names = (
        'ID',
        'age',
        'gender',
        'native-country',
        'race',
        'marital-status',
        'workclass',
        'occupation',
        'income',
        'People_Family',
        'education',
        'GlycoHemoglobin',
        'ArmCircum',
        'SaggitalAbdominal',
        'GripStrength',
        'Taking_Insulin',
        'Taking_Oral_Agents',
        'Eyes_Affected',
        'Recent_BP',
        'Diabetes',
    )

    categorical = set((
        'gender',
        'native-country',
        'race',
        'marital-status',
        'workclass',
        'occupation',
        'income',
        'education',
    ))

    df = pd.read_csv("./static/finaldata_pakka.txt")

    #get the span (range) of values that 'can' be entered in the column
    def get_spans(df, partition, scale=None):
        spans = {}
        for column in df.columns:
            if column in categorical:
                span = len(df[column][partition].unique())
            else:
                span = df[column][partition].max() - df[column][partition].min(
                )
            if scale is not None:
                span = span / scale[column]
            spans[column] = span
        return spans

    #seprate the values as two parts
    def split(df, partition, column):
        dfp = df[column][partition]
        if column in categorical:
            values = dfp.unique()
            lv = set(values[:len(values) // 2])
            rv = set(values[len(values) // 2:])
            return dfp.index[dfp.isin(lv)], dfp.index[dfp.isin(rv)]
        else:
            median = dfp.median()
            dfl = dfp.index[dfp < median]
            dfr = dfp.index[dfp >= median]
            return (dfl, dfr)

    def is_k_anonymous(df, partition, sensitive_column, k=3):
        if len(partition) < k:
            return False
        return True

    def partition_dataset(df, feature_columns, sensitive_column, scale,
                          is_valid):
        finished_partitions = []
        partitions = [df.index]
        while partitions:
            partition = partitions.pop(0)
            spans = get_spans(df[feature_columns], partition, scale)
            for column, span in sorted(spans.items(), key=lambda x: -x[1]):
                lp, rp = split(df, partition, column)
                if not is_valid(df, lp, sensitive_column) or not is_valid(
                        df, rp, sensitive_column):
                    continue
                partitions.extend((lp, rp))
                break
            else:
                finished_partitions.append(partition)
        return finished_partitions

    def build_indexes(df):
        indexes = {}
        for column in categorical:
            values = sorted(df[column].unique())
            indexes[column] = {
                x: y
                for x, y in zip(values, range(len(values)))
            }
        return indexes

    def get_coords(df, column, partition, indexes, offset=0.1):
        if column in categorical:
            sv = df[column][partition].sort_values()
            l, r = indexes[column][sv[
                sv.index[0]]], indexes[column][sv[sv.index[-1]]] + 1.0
        else:
            sv = df[column][partition].sort_values()
            next_value = sv[sv.index[-1]]
            larger_values = df[df[column] > next_value][column]
            if len(larger_values) > 0:
                next_value = larger_values.min()
            l = sv[sv.index[0]]
            r = next_value
        l -= offset
        r += offset
        return l, r

    def get_partition_rects(df,
                            partitions,
                            column_x,
                            column_y,
                            indexes,
                            offsets=[0.1, 0.1]):
        rects = []
        for partition in partitions:
            xl, xr = get_coords(df,
                                column_x,
                                partition,
                                indexes,
                                offset=offsets[0])
            yl, yr = get_coords(df,
                                column_y,
                                partition,
                                indexes,
                                offset=offsets[1])
            rects.append(((xl, yl), (xr, yr)))
        return rects

    def get_bounds(df, column, indexes, offset=1.0):
        if column in categorical:
            return 0 - offset, len(indexes[column]) + offset
        return df[column].min() - offset, df[column].max() + offset

    def plot_rects(df,
                   ax,
                   rects,
                   column_x,
                   column_y,
                   edgecolor='black',
                   facecolor='none'):
        for (xl, yl), (xr, yr) in rects:
            ax.add_patch(
                patches.Rectangle((xl, yl),
                                  xr - xl,
                                  yr - yl,
                                  linewidth=1,
                                  edgecolor=edgecolor,
                                  facecolor=facecolor,
                                  alpha=0.5))
        ax.set_xlim(*get_bounds(df, column_x, indexes))
        ax.set_ylim(*get_bounds(df, column_y, indexes))
        ax.set_xlabel(column_x)
        ax.set_ylabel(column_y)

    def agg_categorical_column(series):
        return [','.join(set(series))]

    def agg_numerical_column(series):
        return [series.mean()]

    def build_anonymized_dataset(df,
                                 partitions,
                                 feature_columns,
                                 sensitive_column,
                                 max_partitions=None):
        aggregations = {}
        for column in feature_columns:
            if column in categorical:
                aggregations[column] = agg_categorical_column
            else:
                aggregations[column] = agg_numerical_column
        rows = []
        for i, partition in enumerate(partitions):
            if i % 100 == 1:
                print("Finished {} partitions...".format(i))
            if max_partitions is not None and i > max_partitions:
                break
            grouped_columns = df.loc[partition].agg(aggregations,
                                                    squeeze=False)
            sensitive_counts = df.loc[partition].groupby(sensitive_column).agg(
                {sensitive_column: 'count'})
            values = grouped_columns.iloc[0].to_dict()
            for sensitive_value, count in sensitive_counts[
                    sensitive_column].items():
                if count == 0:
                    continue
                values.update({
                    sensitive_column: sensitive_value,
                    'count': count,
                })
                rows.append(values.copy())
        return pd.DataFrame(rows)

    def diversity(df, partition, column):
        return len(df[column][partition].unique())

    def is_l_diverse(df, partition, sensitive_column, l=2):
        return diversity(df, partition, sensitive_column) >= l

    def t_closeness(df, partition, column, global_freqs):
        total_count = float(len(partition))
        d_max = None
        group_counts = df.loc[partition].groupby(column)[column].agg('count')
        for value, count in group_counts.to_dict().items():
            p = count / total_count
            d = abs(p - global_freqs[value])
            if d_max is None or d > d_max:
                d_max = d
        return d_max

    def is_t_close(df, partition, sensitive_column, global_freqs, p=0.2):
        if not sensitive_column in categorical:
            raise ValueError("this method only works for categorical values")
        return t_closeness(df, partition, sensitive_column, global_freqs) <= p

    #print the loaded data
    print(df.head())

    #change the type of data other than numerical as categorical
    for name in categorical:
        # print(df[name])
        df[name] = df[name].astype('category')

    #print the spans in the columns
    full_spans = get_spans(df, df.index)
    print(full_spans)

    #these columns will be shown in the generated data and in the graph too
    feature_columns = ['age', 'Diabetes']
    sensitive_column = 'income'
    finished_partitions = partition_dataset(df, feature_columns,
                                            sensitive_column, full_spans,
                                            is_k_anonymous)

    print(len(finished_partitions))
    print("++++++")

    indexes = build_indexes(df)
    column_x, column_y = feature_columns[:2]
    rects = get_partition_rects(df,
                                finished_partitions,
                                column_x,
                                column_y,
                                indexes,
                                offsets=[0.0, 0.0])

    #print the matrics
    print(rects[:10])
    print("==========================")

    #show graph for k-anonimization
    pl.figure(figsize=(20, 20))
    ax = pl.subplot(111)
    plot_rects(df, ax, rects, column_x, column_y, facecolor='r')
    pl.scatter(df[column_x], df[column_y])
    pl.show()

    dfn = build_anonymized_dataset(df, finished_partitions, feature_columns,
                                   sensitive_column)

    print("start-------------")
    #this prints the k anonymized data
    print(dfn.sort_values(feature_columns + [sensitive_column]))
    with open("k-anonimized.txt", "w") as file:
        file.write(
            dfn.sort_values(feature_columns + [sensitive_column]).to_string())
    print("end--------")

    finished_l_diverse_partitions = partition_dataset(
        df, feature_columns, sensitive_column, full_spans,
        lambda *args: is_k_anonymous(*args) and is_l_diverse(*args))

    print(len(finished_l_diverse_partitions))

    column_x, column_y = feature_columns[:2]
    l_diverse_rects = get_partition_rects(df,
                                          finished_l_diverse_partitions,
                                          column_x,
                                          column_y,
                                          indexes,
                                          offsets=[0.0, 0.0])

    #show graph for l-anonymized data
    pl.figure(figsize=(20, 20))
    ax = pl.subplot(111)
    plot_rects(df,
               ax,
               l_diverse_rects,
               column_x,
               column_y,
               edgecolor='b',
               facecolor='b')
    plot_rects(df, ax, rects, column_x, column_y, facecolor='r')
    pl.scatter(df[column_x], df[column_y])
    pl.show()

    dfl = build_anonymized_dataset(df, finished_l_diverse_partitions,
                                   feature_columns, sensitive_column)

    print("start**************")
    #prints the L-precision anonymized data
    print(dfl.sort_values([column_x, column_y, sensitive_column]))
    with open("l-anonimized.txt", "w") as file:
        file.write(
            dfl.sort_values([column_x, column_y,
                             sensitive_column]).to_string())
    print("end****************")

    global_freqs = {}
    total_count = float(len(df))
    group_counts = df.groupby(sensitive_column)[sensitive_column].agg('count')
    for value, count in group_counts.to_dict().items():
        p = count / total_count
        global_freqs[value] = p

    print(global_freqs)
    print("###############")

    finished_t_close_partitions = partition_dataset(
        df, feature_columns, sensitive_column, full_spans,
        lambda *args: is_k_anonymous(*args) and is_t_close(
            *args, global_freqs))

    print("&&&&&&&")
    print(len(finished_t_close_partitions))
    print("&&&&&&&")

    dft = build_anonymized_dataset(df, finished_t_close_partitions,
                                   feature_columns, sensitive_column)

    print("start!!!!!!!!!!!!!!!!!!")
    #prints the T-closnessed data
    print(dft.sort_values([column_x, column_y, sensitive_column]))
    with open("t-anonimized.txt", "w") as file:
        file.write(
            dft.sort_values([column_x, column_y,
                             sensitive_column]).to_string())
    print("end!!!!!!!!!!!!!!!!!!!!")

    column_x, column_y = feature_columns[:2]
    t_close_rects = get_partition_rects(df,
                                        finished_t_close_partitions,
                                        column_x,
                                        column_y,
                                        indexes,
                                        offsets=[0.0, 0.0])

    #show graph for t-anonymized data
    pl.figure(figsize=(20, 20))
    ax = pl.subplot(111)
    plot_rects(df,
               ax,
               t_close_rects,
               column_x,
               column_y,
               edgecolor='b',
               facecolor='b')
    pl.scatter(df[column_x], df[column_y])
    pl.show()
    def plot(self):
        if self.name == 'regression':
            """Evaluate model during training. 
			Print predictions including 4 rows:
				1. target
				2. predictive mean
				3. error of the above two
				4. two sigma of predictive variance
			Args:
				test_fixed (Tensor): (2, N, *), `test_fixed[0]` is the fixed test input, 
					`test_fixed[1]` is the corresponding target
			"""
            self.bayes_nn.load_state_dict(
                torch.load("test.pt", map_location='cpu'))
            self.bayes_nn.eval()

            x = np.linspace(-0.5, 0.5, 100).reshape(-1, 1)
            y = self._f(x, sigma=self.noise_data)
            ytrue = self._f(x, sigma=0.0)
            xt, yt = torch.Tensor(x), torch.Tensor(y)
            xt, yt = xt.to(device), yt.to(device)
            with torch.no_grad():
                y_pred_mean, y_pred_var = self.bayes_nn.predict(xt)

            pred = y_pred_mean.cpu().detach().numpy().ravel()
            var = y_pred_var.cpu().detach().numpy().ravel()
            plt.figure()
            plt.plot(x.ravel(), pred, label='Prediction')
            plt.scatter(x, y, label='Data')
            plt.plot(x, ytrue, label='Truth')
            plt.fill_between(x.ravel(),
                             pred + var,
                             pred - var,
                             alpha=0.5,
                             label='Uncertainty')
            plt.legend()
            plt.show()

        elif self.name == 'stenosis_hard':
            #self.bayes_nn.load_state_dict(torch.load("test1e-2_1p.pt",map_location = 'cpu'))
            self.bayes_nn.load_state_dict(
                torch.load("test1500.pt", map_location='cpu'))

            self.bayes_nn.eval()
            Data = np.load('stenosis_hard_coord.npz')
            x = Data['x']
            y = Data['y']
            u = Data['u']
            v = Data['v']
            P = Data['P']
            u_CFD = u
            v_CFD = v
            P_CFD = P
            print('u_CFD is', u_CFD)
            print('v_CFD is', v_CFD)
            yUp = Data['yUp']
            xt, yt = torch.Tensor(x), torch.Tensor(y)
            Rt = torch.Tensor(yUp).to(device)
            print('Rt.requires_grad is', Rt.requires_grad)
            xt, yt = xt.view(len(xt), -1), yt.view(len(yt), -1)
            xt.requires_grad = True
            yt.requires_grad = True
            xt, yt = xt.to(device), yt.to(device)
            inputs = torch.cat((xt, yt), 1)
            #with torch.no_grad():
            print('inputs is', inputs)
            y_pred_mean = self.bayes_nn.forward(inputs)
            #pred = y_pred_mean.cpu().detach().numpy()
            pred = y_pred_mean

            for i in range(0, pred.shape[0]):
                # hard constraint u
                #pred[i,:,0] *= (Rt[:,0]**2 - yt[:,0]**2)
                # hard constraint v
                #pred[i,:,1] *= (Rt[:,0]**2 -yt[:,0]**2)
                # hard constraint P
                pred[i, :, 2] = (args.xStart - xt[:, 0]) * 0 + args.dP * (
                    args.xEnd - xt[:, 0]) / args.L + 0 * yt[:, 0] + (
                        args.xStart - xt[:, 0]) * (args.xEnd -
                                                   xt[:, 0]) * pred[i, :, 2]
            print('pred.shape is', pred.shape)
            mean = pred.mean(0)
            EyyT = (pred**2).mean(0)
            EyEyT = mean**2
            beta_inv = (-self.bayes_nn.log_beta).exp()
            print('beta_inv.mean', beta_inv.mean())
            var = beta_inv.mean() + EyyT - EyEyT

            #var = (pred.std(0))**2
            print('mean.shape', mean.shape)
            print('var.shape', var.shape)
            u_hard = mean[:, 0]
            v_hard = mean[:, 1]
            P_hard = mean[:, 2]
            u_hard = u_hard.view(len(u_hard), -1)
            v_hard = v_hard.view(len(v_hard), -1)
            P_hard = P_hard.view(len(P_hard), -1)
            u_hard = u_hard.cpu().detach().numpy()
            v_hard = v_hard.cpu().detach().numpy()
            P_hard = P_hard.cpu().detach().numpy()
            var_u = var[:, 0]
            var_v = var[:, 1]
            var_P = var[:, 2]
            var_u = var_u.view(len(var_u), -1)
            var_v = var_v.view(len(var_v), -1)
            var_P = var_P.view(len(var_P), -1)
            var_u = var_u.cpu().detach().numpy()
            var_v = var_v.cpu().detach().numpy()
            var_P = var_P.cpu().detach().numpy()

            #plot_x = 0.4
            #plot_y = 0.045
            plot_x = 0.4 * np.max(x)
            plot_y = 0.95 * np.max(y)
            fontsize = 18
            #axis_limit = [-0.5, 0.5, -0.5, 0.2]
            noise_lv = 0.05
            print('shape of u is', u.shape)
            print('shape of v is', v.shape)
            print('shape of P is', P.shape)
            u_noiseCFD = np.zeros_like(u)
            v_noiseCFD = np.zeros_like(v)
            P_noiseCFD = np.zeros_like(P)
            for i in range(0, len(u)):
                u_error = np.random.normal(0, noise_lv * np.abs(u[i]), 1)
                #print('std is',noise_lv*np.abs(sparse_udom[i]))
                #print('np.random.normal(0, noise_lv*np.abs(sparse_udom[i]), 1)',np.random.normal(0, noise_lv*np.abs(sparse_udom[i]), 1))
                v_error = np.random.normal(0, noise_lv * np.abs(v[i]), 1)
                p_error = np.random.normal(0, noise_lv * np.abs(P[i]), 1)
                u_noiseCFD[i] = u[i] + u_error
                v_noiseCFD[i] = v[i] + v_error
                P_noiseCFD[i] = P[i] + p_error

            Data_sparse = np.load('xyuvp_sparse_separate_3sec.npz')
            sparse_x = Data_sparse['xdom']
            print('x_size is', sparse_x.shape)
            sparse_y = Data_sparse['ydom']
            sparse_u = Data_sparse['udom']
            sparse_v = Data_sparse['vdom']
            xinlet = Data_sparse['xinlet']
            yinlet = Data_sparse['yinlet']
            uinlet = Data_sparse['uinlet']
            xoutlet = Data_sparse['xoutlet']
            youtlet = Data_sparse['youtlet']
            uoutlet = Data_sparse['uoutlet']
            xb = Data_sparse['xb']
            yb = Data_sparse['yb']
            ub = Data_sparse['ub']
            xb_full = Data_sparse['xb_full']
            yb_full = Data_sparse['yb_full']

            xtrain = np.concatenate((xinlet, xoutlet, sparse_x), 0)
            ytrain = np.concatenate((yinlet, youtlet, sparse_y), 0)

            ##
            loss_f = nn.MSELoss()
            print('u_hard is', u_hard)
            print('u_CFD is', u_CFD)

            # accruacy of u
            error_u = loss_f(torch.Tensor(u_hard), torch.Tensor(u_CFD)).item()
            # accuracy of v
            error_v = loss_f(torch.Tensor(v_hard), torch.Tensor(v_CFD)).item()
            # accuracy of P
            error_P = loss_f(torch.Tensor(P_hard), torch.Tensor(P_CFD)).item()

            ## relative norm
            ut = torch.Tensor(u_CFD)
            vt = torch.Tensor(v_CFD)
            pt = torch.Tensor(P_CFD)

            u_CFDnorm = loss_f(ut, torch.zeros_like(ut)).item()
            v_CFDnorm = loss_f(vt, torch.zeros_like(vt)).item()
            P_CFDnorm = loss_f(pt, torch.zeros_like(pt)).item()

            print('u_CFDnorm is', np.sqrt(u_CFDnorm))
            print('v_CFDnorm is', np.sqrt(v_CFDnorm))
            print('P_CFDnorm is', np.sqrt(P_CFDnorm))

            np.savetxt('u_CFDnorm.csv', np.array([np.sqrt(u_CFDnorm)]))
            np.savetxt('v_CFDnorm.csv', np.array([np.sqrt(v_CFDnorm)]))
            np.savetxt('P_CFDnorm.csv', np.array([np.sqrt(P_CFDnorm)]))

            relative_error_u = np.sqrt(error_u / u_CFDnorm)
            relative_error_v = np.sqrt(error_v / v_CFDnorm)
            relative_error_P = np.sqrt(error_P / P_CFDnorm)

            print('relative norm |u - u_CFD|/|u_CFD|', relative_error_u)
            print('relative norm |v - v_CFD|/|v_CFD|', relative_error_v)
            print('relative norm |P - P_CFD|/|P_CFD|', relative_error_P)

            np.savetxt('Relative_error_u.csv', np.array([relative_error_u]))
            np.savetxt('Relative_error_v.csv', np.array([relative_error_v]))
            np.savetxt('Relative_error_P.csv', np.array([relative_error_P]))

            ###

            ## Std u mean
            uq_u_mean = np.sqrt(var_u).mean()
            ## Std v mean
            uq_v_mean = np.sqrt(var_v).mean()
            ## Std P mean
            uq_P_mean = np.sqrt(var_P).mean()

            ## Std u max
            uq_u_max = np.sqrt(var_u).max()
            ## Std v max
            uq_v_max = np.sqrt(var_v).max()
            ## Std P max
            uq_P_max = np.sqrt(var_P).max()
            #
            #print('uq_u.shape is', uq_u.shape)
            np.savetxt('error_u.csv', np.array([error_u]))
            np.savetxt('error_v.csv', np.array([error_v]))
            np.savetxt('error_P.csv', np.array([error_P]))

            np.savetxt('uq_umean.csv', np.array([uq_u_mean]))
            np.savetxt('uq_vmean.csv', np.array([uq_v_mean]))
            np.savetxt('uq_Pmean.csv', np.array([uq_P_mean]))

            np.savetxt('uq_umax.csv', np.array([uq_u_max]))
            np.savetxt('uq_vmax.csv', np.array([uq_v_max]))
            np.savetxt('uq_Pmax.csv', np.array([uq_P_max]))

            print('test loss u is', error_u)
            print('test loss v is', error_v)
            print('test loss P is', error_P)

            print('mean uq u is', uq_u_mean)
            print('mean uq v is', uq_v_mean)
            print('mean uq P is', uq_P_mean)

            print('max uq u is', uq_u_max)
            print('max uq v is', uq_v_max)
            print('max uq P is', uq_P_max)

            plt.figure()
            plt.subplot(2, 1, 1)
            #plt.scatter(x, y, c= np.sqrt(var_u)/u_hard, label = 'u_hard_var')
            plt.scatter(x,
                        y,
                        c=np.sqrt(var_u),
                        label='u_hard_std',
                        cmap='coolwarm')
            plt.text(plot_x, plot_y, r'u Std', {
                'color': 'b',
                'fontsize': fontsize
            })
            #plt.axis('equal')
            plt.colorbar()
            plt.savefig('softuNN_std_noise15.png', bbox_inches='tight')

            plt.figure()
            plt.subplot(2, 1, 1)
            plt.scatter(x,
                        y,
                        c=u_hard,
                        label='uhard',
                        cmap='coolwarm',
                        vmin=min(u_CFD),
                        vmax=max(u_CFD))
            plt.text(plot_x, plot_y, r'u Mean', {
                'color': 'b',
                'fontsize': fontsize
            })
            plt.colorbar()
            #plt.axis('equal')
            plt.savefig('softuNN_mean_noise15.png', bbox_inches='tight')

            plt.figure()
            plt.subplot(2, 1, 1)
            plt.scatter(x,
                        y,
                        c=u_noiseCFD,
                        label='u CFD',
                        cmap='coolwarm',
                        vmin=min(u_CFD),
                        vmax=max(u_CFD))
            plt.colorbar()
            plt.scatter(xtrain, ytrain, marker='x', c='black')
            plt.text(plot_x, plot_y, r'u CFD', {
                'color': 'b',
                'fontsize': fontsize
            })
            #plt.scatter(x, y, c= np.sqrt(var_v), label = 'v_hard_std')
            #plt.scatter(x,y, c = np.sqrt(var_v)/v_hard, label = 'v_hard_std')
            #plt.axis('equal')

            plt.savefig('u_CFD_noise15.png', bbox_inches='tight')

            plt.figure()
            plt.subplot(2, 1, 1)
            #plt.scatter(x, y, c= np.sqrt(var_u)/u_hard, label = 'u_hard_var')
            plt.scatter(x,
                        y,
                        c=np.sqrt(var_v),
                        label='u_hard_std',
                        vmin=0.001,
                        cmap='coolwarm')
            plt.text(plot_x, plot_y, r'v Std', {
                'color': 'b',
                'fontsize': fontsize
            })
            plt.colorbar()
            #plt.savefig('u_hard_var.png',bbox_inches = 'tight')
            #plt.figure()
            plt.subplot(2, 1, 2)
            plt.scatter(x, y, c=v_hard, label='uhard', cmap='coolwarm')
            plt.text(plot_x, plot_y, r'v Mean', {
                'color': 'b',
                'fontsize': fontsize
            })
            #plt.scatter(x, y, c= np.sqrt(var_v), label = 'v_hard_std')
            #plt.scatter(x,y, c = np.sqrt(var_v)/v_hard, label = 'v_hard_std')
            plt.colorbar()
            plt.savefig('v_hard_var.png', bbox_inches='tight')
            plt.figure()
            plt.scatter(x, y, c=P_hard, label='P_hard_std', cmap='coolwarm')
            plt.colorbar()
            plt.savefig('P_hard_var.png', bbox_inches='tight')
            #plt.scatter(x,y,label  ='Data')
            plt.show()

            print('mean of stdvar_u', np.mean(np.sqrt(var_u)))
            print('mean of std var_v', np.mean(np.sqrt(var_v)))

        else:
            raise Exception("error,no such model")
Example #48
0
std = StandardScaler()
transformed = StandardScaler().fit_transform(x)

cov_pca = convers_pca(no_of_components=2)
cov_pca.fit(transformed)

print(cov_pca.eigen_vectors)

print(cov_pca.eigen_values)

print(cov_pca.sorted_components)

x_std = cov_pca.transform(transformed)

plt.figure()
plt.scatter(x_std[:, 0], x_std[:, 1], c=y)

plt.show(block=False)

from pandas.plotting import scatter_matrix

iris = datasets.load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
colors = np.array(50 * ['r'] + 50 * ['g'] + 50 * ['b'])  #총 150개 데이터, 50개씩 순서대로

scatter_matrix(df, alpha=0.7, figsize=(10, 10), color=colors)

plt.show()
iris.data.shape
iris_target = iris.target
Example #49
0
    def visualize_decision_tree_classes(self,
                                        best_nodes,
                                        classes_list=None,
                                        restrict_to_pcateg=False,
                                        exclude_leafs=None,
                                        savedir=None,
                                        postfix=''):
        """Visualize embeddings, colors by class predicted by decision tree."""

        classes_list = classes_list or self.classes_list
        savedir = savedir or self.savedir

        init_point_size = 10.
        point_size_ds = 1.
        alphas = [0.8, 0.5]
        _, y = self._getxy()

        plt.figure(figsize=(7, 7))

        point_size = init_point_size
        alphas = np.linspace(alphas[0], alphas[1], len(classes_list))

        # keep track of plotted indices to be able to exclude downstream
        # nodes when plotting upstream ones when relevant
        kept_idxs = []

        for clno, cls in enumerate(classes_list):

            # maybe restrict to leafs predicted as a particular class by NuCLS
            keep1 = None
            if restrict_to_pcateg:
                keep1 = (self.clusts.loc[:, 'pred_categ'] == cls).values

            # restrict to downstream leafs to node of interest
            keep2 = np.in1d(self.pred_y_leafs,
                            self.node_leafs[best_nodes[cls]])  # noqa
            if keep1 is None:
                keep = keep2
            else:
                keep = keep1 & keep2

            # maybe exclude certain leafs
            if exclude_leafs is not None:
                keep[exclude_leafs] = False

            # keep track of kept idxes
            kept_idxs.extend(np.argwhere(keep)[:, 0].tolist())

            # now restrict to leaves of interes
            y_subset = y[keep, :]

            # plot
            plt.scatter(y_subset[:, 0],
                        y_subset[:, 1],
                        c=np.array(VisConfigs.CATEG_COLORS[cls])[None, :] /
                        255.,
                        alpha=alphas[clno],
                        s=point_size,
                        edgecolors='none')

            point_size = point_size_ds * point_size

        plt.xlim(self._e0min, self._e0max)
        plt.ylim(self._e1min, self._e1max)
        plt.title(f'DTALE decisions ({postfix})',
                  fontsize=14,
                  fontweight='bold')
        # plt.show()
        # plt.savefig(opj(savedir, f'dectreeCol{postfix}.svg'))
        plt.savefig(opj(savedir, f'dectreeCol{postfix}.png'))

        return kept_idxs
Example #50
0
# N = 6000

known_labels_ratio = 0.1
X, y = load_anomaly('../data/Animal_Data_prey_predator.csv')
N = X.shape[0]
rp = np.random.permutation(int(N/10))

# data_P = X[y==1][rp[:int(len(rp)*known_labels_ratio)]]
data_P = X[y == 1]
data_N = X[y == 0]
# data_U = np.concatenate((X[y==1][rp[int(len(rp)*known_labels_ratio):]], X[y==0]), axis=0)

print("Amount of positive samples: %d" % (data_P.shape[0]))
print("Amount of negative samples: %d" % (data_N.shape[0]))
plt.figure(figsize=(8, 4.5))
plt.scatter(data_N[:, 0], data_N[:, 1], c='k', marker='.', linewidth=1, s=1, alpha=0.5, label='Negative')
plt.scatter(data_P[:, 0], data_P[:, 1], c='b', marker='o', linewidth=0, s=20, alpha=0.5, label='Positive')
plt.grid()
plt.legend()

# model = DecisionTreeClassifier(max_depth=None, max_features=None,
#                                    criterion='gini', class_weight='balanced')

baggingPU = svm.SVC(kernel='linear', probability=True)

# true_labels = np.zeros(shape=(data_U.shape[0]))
# true_labels[:int(len(rp)*(1.0-known_labels_ratio))] = 1.0


# With different interactions
training_set = []
Example #51
0
 def scatter(self, x, y, c):
     plt.scatter(x, y, color=c, label='point')
Example #52
0
    def visualize_decision_tree_nodes(self, best_nodes, postfix=''):
        """Visualize the learned decision tree nodes."""

        plt.figure(figsize=(7, 7))

        # scatter actual points from NuCLS model in background
        _, y = self._getxy()
        plt.scatter(y[:, 0],
                    y[:, 1],
                    c='beige',
                    alpha=0.6,
                    s=4,
                    edgecolors='none')

        # trace the learned decision tree
        for node in range(self.tree.node_count):
            if self.tree.children_left[node] == -1:
                continue
            me = self.tree.value[node, :, 0]
            clt = self.tree.value[self.tree.children_left[node], :, 0]
            crt = self.tree.value[self.tree.children_right[node], :, 0]
            plt.plot(
                [clt[0], me[0], crt[0]],
                [clt[1], me[1], crt[1]],
                color='gray',
                marker='.',
                linestyle='-',
                linewidth=0.5,
                markersize=3,
                alpha=0.5,
            )

        # highligh root node
        me = self.tree.value[0, :, 0]
        plt.scatter([me[0]], [me[1]],
                    color='k',
                    s=30,
                    alpha=1.,
                    edgecolors='k')

        # color best (class-representative) nodes by class
        for cls, node in best_nodes.items():

            me = self.tree.value[node, :, 0]

            # color the trace along the decision tree till best node
            trace, _ = self._trace_from_node_to_root(node)
            for ndi in range(len(trace) - 1):
                clt = self.tree.value[trace[ndi], :, 0]
                crt = self.tree.value[trace[ndi + 1], :, 0]
                plt.plot(
                    [clt[0], crt[0]],
                    [clt[1], crt[1]],
                    color='k',
                    alpha=1.,
                    marker='o',
                    markersize=2.5,
                    linestyle='-',
                    linewidth=1.3,
                )

            # highlight actual chosen best node
            color = np.array(VisConfigs.CATEG_COLORS[cls])[None, :] / 255.
            plt.scatter([me[0]], [me[1]],
                        color=color,
                        s=150,
                        alpha=1.,
                        edgecolors='none')

        plt.xlim(self._e0min, self._e0max)
        plt.ylim(self._e1min, self._e1max)
        plt.title(f'DTALE nodes ({postfix})', fontsize=14, fontweight='bold')
        # plt.show()
        # plt.savefig(opj(self.savedir, f'dectree{postfix}.svg'))
        plt.savefig(opj(self.savedir, f'dectree{postfix}.png'))
Example #53
0
    b_i = np.random.uniform(0., 1., (n_i,))
    b_i = b_i / np.sum(b_i)  # Dirac weights

    measures_locations.append(x_i)
    measures_weights.append(b_i)


##############################################################################
# Compute free support barycenter
# -------------

k = 10  # number of Diracs of the barycenter
X_init = np.random.normal(0., 1., (k, d))  # initial Dirac locations
b = np.ones((k,)) / k  # weights of the barycenter (it will not be optimized, only the locations are optimized)

X = ot.lp.free_support_barycenter(measures_locations, measures_weights, X_init, b)


##############################################################################
# Plot data
# ---------

pl.figure(1)
for (x_i, b_i) in zip(measures_locations, measures_weights):
    color = np.random.randint(low=1, high=10 * N)
    pl.scatter(x_i[:, 0], x_i[:, 1], s=b * 1000, label='input measure')
pl.scatter(X[:, 0], X[:, 1], s=b * 1000, c='black', marker='^', label='2-Wasserstein barycenter')
pl.title('Data measures and their barycenter')
pl.legend(loc=0)
pl.show()
Example #54
0
om_net = one_mode_network(om_net_data)

x = collections.OrderedDict()
y = collections.OrderedDict()

om_degree = []
tm_degree = []

for i in range(len(om_net.adj)):

    x[i] = tm_net.get_effective_size(i, True)
    #x[i] = tm_net.get_effective_size(i, False)
    y[i] = om_net.get_effective_size(i)
    om_degree.append(len(om_net.get_contacts(i)))
    tm_degree.append(len(tm_net.get_contacts(i, True)))
    #tm_degree.append(len(tm_net.get_contacts(i, False)))

# plot by effective size stuff
x_items = x.items()
x_list = [i[1] for i in x_items]

y_items = y.items()
y_list = [j[1] for j in y_items]

plt.scatter(x_list, y_list, s=1)
plt.xlabel("Two-mode effective size")
plt.ylabel("One-mode effective size")

plt.show()
    base = path.basename(datdir)
    if (path.isdir(datdir) and len(
            glob.glob("{0}/../../coord/TKR4p173/{1}/*.xy".format(datdir,
                                                                 base))) > 0
            and len(
                glob.glob("{0}/../../coord/TKR4p173/diffusion_{1}.xc".format(
                    datdir, base))) > 0):
        # Plot phase diagram
        plt.figure(0, figsize=(10, 7.5))  # inches
        plt.plot(XS, YS, "-k")
        plt.plot(X0, Y0, "-k", zorder=1)
        plt.title("Cr-Nb-Ni at %.0f K" % temp, fontsize=18)
        plt.xlabel(r"$x_\mathrm{Nb}$", fontsize=18)
        plt.ylabel(r"$x_\mathrm{Cr}$", fontsize=18)
        plt.xticks(np.linspace(0, 1, 21))
        plt.scatter(Xtick, Ytick, color="black", s=3)
        gann = plt.text(simX(0.010, 0.495),
                        simY(0.495),
                        r"$\gamma$",
                        fontsize=14)
        dann = plt.text(simX(0.230, 0.010),
                        simY(0.010),
                        r"$\delta$",
                        fontsize=14)
        lann = plt.text(simX(0.340, 0.275), simY(0.275), r"L", fontsize=14)

        # Add composition pathways
        fnames = sorted(
            glob.glob("{0}/../../coord/TKR4p173/{1}/*.xy".format(datdir,
                                                                 base)))
        for file in fnames[::10]:
    def dataloader(self):
        if self.name == 'regression':
            train_size = args.batch_size

            X = np.linspace(-0.5, 0.5, train_size).reshape(-1, 1)
            y = self._f(X, sigma=self.noise_data)
            y_true = self._f(X, sigma=0.0)
            plt.scatter(X, y, marker='+', label='Training data')
            plt.plot(X, y_true, label='Truth')
            plt.title('Noisy training data and ground truth')
            plt.legend()
            plt.show()

            X_train, Y_train = torch.Tensor(X), torch.Tensor(y)
            X_test, Y_test = torch.Tensor(X), torch.Tensor(y)

            data = torch.utils.data.TensorDataset(X_train, Y_train)

            train_loader = torch.utils.data.DataLoader(data,
                                                       batch_size=train_size,
                                                       shuffle=True)
            return train_loader, train_size

        elif self.name == 'stenosis_hard':
            train_size = args.batch_size
            N_y = 30
            L = 1
            xStart = 0
            xEnd = xStart + L
            rInlet = 0.05

            nPt = 100
            unique_x = np.linspace(xStart, xEnd, nPt)
            sigma = 0.1
            scale = 0.005
            mu = 0.5 * (xEnd - xStart)
            x_2d = np.tile(unique_x, N_y)
            x = x_2d
            x = np.reshape(x, (len(x), 1))

            Data = np.load('xyuvp_uinlet.npz')
            x = Data['x']
            y = Data['y']
            u = Data['u']
            v = Data['v']
            P = Data['p']
            x = x[..., None]
            y = y[..., None]
            u = u[..., None]
            v = v[..., None]
            P = P[..., None]
            #print('x.shape is',x.shape)
            R = scale * 1 / np.sqrt(2 * np.pi * sigma**2) * np.exp(
                -(x - mu)**2 / (2 * sigma**2))
            nu = 1e-3
            yUp = rInlet - R
            yDown = -rInlet + R
            plt.scatter(x, yUp)
            plt.scatter(x, yDown)
            plt.scatter(x, y)
            plt.axis('equal')
            plt.show()
            ############################

            np.savez('stenosis_hard_coord', x=x, y=y, yUp=yUp, u=u, v=v, P=P)
            ################
            data = torch.utils.data.TensorDataset(torch.FloatTensor(x),
                                                  torch.FloatTensor(y))

            train_loader = torch.utils.data.DataLoader(data,
                                                       batch_size=train_size,
                                                       shuffle=True)
            print('len(data is)', len(data))
            print('len(dataloader is)', len(train_loader))
            return train_loader, train_size
        else:
            raise Exception("error,no such model")
Example #57
0
def draw_world(occupancy_grid,
               robot_locations,
               assignments,
               lines_plot={},
               poses=[],
               line_multiplier=1):
    fig, ax = plt.subplots()
    occupancy_grid.draw()

    colours = [(1, 0, 0), (0, 1, 0), (0, 0, 1), (1, 1, 0), (1, 0, 1),
               (0, 1, 1), (0.5, 0, 0), (0, 0.5, 0), (0, 0, 0.5), (0.5, 0.5, 0),
               (0.5, 0, 0.5), (0, 0.5, 0.5), (0.25, 0, 0), (0, 0.25, 0),
               (0, 0, 0.25), (0.25, 0.25, 0), (0.25, 0, 0.25), (0, 0.25, 0.25)]

    for (i, j), v in np.ndenumerate(assignments):
        pos = occupancy_grid.get_position(i, j)
        from_origin = pos - occupancy_grid.origin
        position = occupancy_grid.origin + line_multiplier * from_origin
        if occupancy_grid.is_free(position):
            if v == 0:
                continue
            rectangle = plt.Rectangle(
                position,
                occupancy_grid.resolution * line_multiplier,
                occupancy_grid.resolution * line_multiplier,
                fc=colours[v])
            plt.gca().add_patch(rectangle)
            #plt.show()

    for pose in poses:
        x, y, angle = pose
        x, y = occupancy_grid.get_position(x, y)
        if angle == 0:
            plt.arrow(x, y - occupancy_grid.resolution / 8, 0,
                      occupancy_grid.resolution / 4)
        elif angle == np.pi / 2:
            plt.arrow(x + occupancy_grid.resolution / 8, y,
                      -occupancy_grid.resolution / 4, 0)
        elif angle == np.pi:
            plt.arrow(x, y + occupancy_grid.resolution / 8, 0,
                      -occupancy_grid.resolution / 4)
        elif angle == -np.pi / 2:
            plt.arrow(x - occupancy_grid.resolution / 8, y,
                      occupancy_grid.resolution / 4, 0)
        else:
            print("Unable to plot")
            sys.exit()

    for robot in robot_locations:
        plot_position = occupancy_grid.get_position(robot[0], robot[1])
        plt.scatter(plot_position[0],
                    plot_position[1],
                    s=10,
                    marker='o',
                    color='black',
                    zorder=1000)
        plt.scatter(plot_position[0],
                    plot_position[1],
                    s=10,
                    marker='o',
                    color='black',
                    zorder=1000)

    plt.axis('equal')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.show()
Example #58
0
import sklearn.discriminant_analysis as LDA
import matplotlib.pylab as plt
import numpy as np

X_train = np.load('./data/wtf_X_train.npy')
X_test = np.load('./data/wtf_X_test.npy')

y_train = np.load('./data/wtf_y_train.npy')
y_test = np.load('./data/wtf_y_test.npy')

l_train = np.nonzero(y_train)[1]
l_test = np.nonzero(y_test)[1]

lda = LDA.LinearDiscriminantAnalysis(n_components=2)
X_train_lda = lda.fit_transform(X_train, l_train)
X_test_lda = lda.fit_transform(X_test, l_test)

plt.figure()
plt.scatter(X_train_lda[:, 0], X_train_lda[:, 1], s=2, c=l_train, cmap='hsv')
plt.figure()
plt.scatter(X_test_lda[:, 0], X_test_lda[:, 1], s=2, c=l_test, cmap='hsv')
plt.show()
Example #59
0
def phaseToMJD(tPho=Table(), MJD0=51000.750, \
                   per=6.4714, tZer=48813.873, \
                   trimWeird=True, \
                   plotDBG=False):
    """Unpacks phase to MJD, with various assumptions about the
    observation date and row order. MJD0 is the MJD at the start of
    the run (will be used to find the nearest whole-number number of
    cycles for the ephemeris). Defaults to 1998 July 6th at 6pm UT.

    trimWeird: Two of the Zurita points appear to be
    mis-ordered. Remove them if trimWeird is set to "True."

    """
    if not 'phase' in tPho.colnames:
        return tPho  # return unchanged

    phase = tPho['phase']

    # WATCHOUT - The Casares et al. ephemeris expresses tZer as JD - 2
    # 400 000.0 which means MJD + 0.5. For ease of reading, we add
    # that 0.5 days back on here.
    tZ = tZer - 0.5

    N_start = np.floor((MJD0 - tZ) / per)

    # which means the next lowest phase zero must occur at MJD...
    mjdPrevZero = tZ + N_start * per
    # print "INFO - nearest phase zero is", mjdPrevZero

    # it will be handy to have an array giving "weird" points for
    # which the time appears to have moved backwards...
    bWeird = np.repeat(False, np.size(phase))

    # we loop through these since it's the row number that preserves
    # the phase ordering
    mjdCalc = np.zeros(np.size(phase))
    iOrbit = 0.0
    for iRow in range(np.size(phase)):

        # which orbit are we on now?
        if iRow > 0:
            if phase[iRow - 1] - phase[iRow] > 0.5:
                iOrbit = iOrbit + 1.0

        # now compute the mjd calc
        nOrbs = phase[iRow] + iOrbit
        mjdCalc[iRow] = mjdPrevZero + nOrbs * per

        if iRow > 0:
            if mjdCalc[iRow] < mjdCalc[iRow - 1]:
                bWeird[iRow] = True

    # update in-place
    tPho['time'] = Column(mjdCalc)

    if plotDBG:
        plt.figure(1)
        plt.clf()
        lCount = np.arange(np.size(phase))
        plt.plot(lCount, phase, 'bo', ls='-')
        plt.plot(lCount[bWeird], phase[bWeird], 'rx', zorder=25)
        plt.xlabel('Row number')
        plt.ylabel('Phase')

        plt.figure(2)
        plt.clf()
        plt.scatter(tPho['time'], tPho['mag'], c='g', \
                        edgecolor='0.5')
        plt.plot(tPho['time'], tPho['mag'], c='g', lw='1')

        plt.plot(tPho['time'][bWeird], tPho['mag'][bWeird], 'rx', zorder=25)
        plt.xlabel('MJD')
        plt.ylabel('Magn')

    if trimWeird:
        tPho = tPho[~bWeird]
        tPho.meta['trimOoO'] = int(np.sum(bWeird))

    # return the table with MJDCalc
    return tPho
Example #60
0
def plot_results(result_folder, observed_data, input_model):

	plt.rcParams['xtick.labelsize'] = 15
	plt.rcParams['ytick.labelsize'] = 15
	plt.rcParams['xtick.major.size'] = 6
	plt.rcParams['xtick.minor.size'] = 6
	plt.rcParams['xtick.major.width'] = 2
	plt.rcParams['xtick.minor.width'] = 2

	result_file = result_folder + "/full_list.txt"
	observed_data = result_folder + "/" + observed_data
	input_model = result_folder + "/" + input_model

	lines = open(observed_data).readlines()
	TTobs, EEobs, ssdobs = [],[],[]
	for i in range(1,len(lines)):
		Tobs = float(lines[i].split()[0])
		Eobs = float(lines[i].split()[1])
		sdobs = float(lines[i].split()[2])
		TTobs.append(Tobs)
		EEobs.append(Eobs)
		ssdobs.append(sdobs)


	Zi, VPi, VSi, RHOi = plot_array_from_model(input_model)


	model_list = []
	lines = open(result_file).readlines()
	n_model = 1
	iindex = []
	nmod = 0
	ccost, vvs1, vvs2, vvs3, vvs4, vvs5, vvs6 = [], [],[],[],[],[],[]
	hh1, hh2, hh3 = [],[],[]
	for i in range(0,len(lines)):
		if lines[i].strip() and lines[i].split()[0] == "mft:":
			mft = float(lines[i].split()[1])
			rough = float(lines[i].split()[3])
			cost = float(lines[i].split()[5])
			cost = mft

			EE, TT = [], []
			for n in range(3,1000):
				if lines[i+n].split()[0] != "!":
					T = float(lines[i+n].split()[0])
					E = float(lines[i+n].split()[1])
					EE.append(E)
					TT.append(T)
				else:
					break


			tthick, vvs, vvp, rrho = [],[],[],[]
			for m in range(n+2, 1000):
				if lines[i+m][1] !="#":
					#print lines[i+m]
					thick = float(lines[i+m].split()[0])
					vs = float(lines[i+m].split()[2]) 
					vp = float(lines[i+m].split()[1])
					rho = float(lines[i+m].split()[3])
					tthick.append(thick)
					vvs.append(vs)
					vvp.append(vp)
					rrho.append(rho)
				else:
					break

			iindex.append(nmod)
			nmod+=1
			model_list.append([n_model, cost, TT, EE, tthick, vvs, vvp, rrho, i])
			n_model += 1
			vs1 = vvs[0]
			vs2 = vvs[1]
			vs3 = vvs[2]
			vs4 = vvs[3]
			vs5 = vvs[4]
			vs6 = vvs[5]

			h1 = tthick[0]
			h2 = tthick[1]
			h3 = tthick[2]

			ccost.append(cost)
			vvs1.append(vs1)
			vvs2.append(vs2)
			vvs3.append(vs3)
			vvs4.append(vs4)
			vvs5.append(vs5)
			vvs6.append(vs6)

			hh1.append(h1)
			hh2.append(h2)
			hh3.append(h3)


	model_list_sorted = sorted(model_list, key = itemgetter(1), reverse=True)
	min_cost = model_list_sorted[-1][1]
	max_cost = model_list_sorted[0][1]




	perc = 20	#  20%

	threshold = min_cost * (perc + 100)/100.

	TTbest = model_list_sorted[-1][2]
	EEbest = model_list_sorted[-1][3]
	tthick = model_list_sorted[-1][4]
	vvs = model_list_sorted[-1][5]
	vvp = model_list_sorted[-1][6]
	rrho = model_list_sorted[-1][7]
	line_best = model_list_sorted[-1][8]

	out = open(result_folder+"/best_model.d","w")
	for i in range(line_best, line_best+1000):
	#	print lines[i]
		if lines[i].split()[0] == "model:":
			for j in range(1,11):
				out.write(lines[i+j])
			break
	out.close()

	mean = (vvs[0]*tthick[0] + vvs[1]*tthick[1])/(tthick[0]+tthick[1])

	Zbest, VSbest, VPbest, RHObest = prepare4plot(tthick, vvs, vvp, rrho)


	cmap = cm.hot
	fig = plt.figure(1, figsize=(8.27, 11.69))
	fig.subplots_adjust( wspace=1.)
	ax1 = plt.subplot2grid((2, 4), (0, 0), colspan=4)
	ax2 = plt.subplot2grid((2, 4), (1, 0), colspan=2)
	ax3 = plt.subplot2grid((2, 4), (1, 2), colspan=2)




	for model in  sorted(model_list_sorted, key = itemgetter(1), reverse=True):
		cost = model[1]
		TT = model[2]
		EE = model[3]
		tthick = model[4]
		vvs = model[5]
		vvp = model[6]
		rrho = model[7]
		Z, VS, VP, RHO = prepare4plot(tthick, vvs, vvp, rrho)

		if cost <= threshold:
			colorVal = normalize_misfit(cost, min_cost, threshold, 0.5,1)

			ax2.plot(VS,Z, color=str(colorVal), linewidth=2, zorder=0)
			ax3.plot(VS,Z, color=str(colorVal), linewidth=2, zorder=0)
			ax1.plot(TT,EE, color=str(colorVal), linewidth=2, zorder=9)

	#plt.subplot(223)
	ax2.plot(VSbest,Zbest, color="red", zorder=2, label="Best model", linewidth=2)
	ax2.plot(VSi, Zi, color="black", label= "Litho1.0", linewidth=2, zorder=2, linestyle=":")
	ax2.set_ylim(45,0)
	ax2.set_xlim(0.2,5.0)
	ax2.set_xlabel("Vs (km/s)",size=15)
	ax2.set_ylabel("Depth (km)",size=15)
	ax2.legend(loc=3, fontsize=13)
	ax2.xaxis.set_major_locator(ticker.FixedLocator([1,2,3,4,5]))
	#plt.axhline(6,color="0.5",linestyle="--",linewidth=0.5)
	#ax2.tick_params(labelsize=13)


	#plt.subplot(224)
	ax3.plot(VSbest,Zbest, color="red", zorder=2, label="Best model", linewidth=2)
	ax3.plot(VSi, Zi, color="black", label= "Litho1.0", linewidth=2, zorder=2, linestyle=":")
	ax3.set_ylim(6,0)
	ax3.set_xlim(0., 4)
	ax3.set_xlabel("Vs (km/s)",size=15)
	ax3.set_ylabel("Depth (km)",size=15)
	ax3.legend(loc=3, fontsize=13)

	ax1.errorbar(TTobs, EEobs, yerr=ssdobs, color="black", fmt=" ",zorder=0, alpha=0.3)
	ax1.scatter(TTobs, EEobs, color="black", s=30, label="Observed data", zorder=0)
	ax1.plot(TTbest,EEbest, color="red", label="Theo. ellipticity from best model",zorder=10, linewidth=2)
	ax1.set_xscale("log")
	ax1.set_xlim(0.9*min(TT),max(TT)*1.1)
	ax1.set_ylim(-1,1)
	ax1.set_xlabel("Period (s)",size=15)
	ax1.set_ylabel("Log(H/V)",size=15)
	ax1.legend(loc=4, fontsize=15)
	#ax1.tick_params(labelsize=13)
	ax1.xaxis.set_minor_formatter(FormatStrFormatter("%.0f"))
	ax1.xaxis.set_major_formatter(FormatStrFormatter("%.0f"))
	#ax1.xaxis.set_major_locator(ticker.FixedLocator([2,3,4,5,6,7,8,9,10,20,30,40,50,60,70,80,90]))
	#ax1.set_xticks([9])

	ax1.set_xticklabels([1,1,1,1,1,1,1,1,2,3,4,5,6,7,8,"",20,"",40,"",60,"",80,""],minor=True)


	plt.suptitle("Real data inversion\nStation: CCD\nMisfit threshold = " +str(perc)+"%",size=18)
	plt.savefig(result_folder + "/results_CCD", dpi=200)
	plt.close()


	#----------------------------------------------------------

	fig = plt.figure(1, figsize=(8.27, 11.69))
	fig.subplots_adjust(wspace=0.5, hspace=0.4, top=0.9)
	plt.subplot(9,1,1)
	plt.scatter(iindex, zip(*model_list)[1], color="black", s=1)
	plt.ylabel("Misfit", size=13)
	plt.yscale("log")
	plt.ylim(min(zip(*model_list)[1])*0.5, max(zip(*model_list)[1]))

	plt.subplot(9,1,2)
	plt.scatter(iindex, vvs1, color="black", s=1)
	plt.ylabel("Vs 1\n(km/s)", size=13)
	
	plt.subplot(9,1,3)
	plt.scatter(iindex, hh1, color="black", s=1)
	plt.ylabel("H 1\n(km)", size=13)

	plt.subplot(9,1,4)
	plt.scatter(iindex, vvs2, color="black", s=1)
	plt.ylabel("Vs 2\n(km/s)", size=13)
	
	plt.subplot(9,1,5)
	plt.scatter(iindex, hh2, color="black", s=1)
	plt.ylabel("H 2\n(km)", size=13)
	
	plt.subplot(9,1,6)
	plt.scatter(iindex, hh3, color="black", s=1)
	plt.ylabel("H 3\n(km)", size=13)

	plt.subplot(9,1,7)
	plt.scatter(iindex, vvs4, color="black", s=1)
	plt.ylabel("Vs 4\n(km/s)", size=13)

	plt.subplot(9,1,8)
	plt.scatter(iindex, vvs5, color="black", s=1)
	plt.ylabel("Vs 5\n(km/s)", size=13)

	plt.subplot(9,1,9)
	plt.scatter(iindex, vvs6, color="black", s=1)
	plt.ylabel("Vs 6\n(km/s)", size=13)
	
	plt.xlabel("# Model", size=13)


	plt.suptitle("Inversion evolution", size=18)
	plt.savefig(result_folder + "/convergence.png")
	plt.close()


	#======================================================================




	plt.figure(figsize=(15,15))
	plt.subplots_adjust(left=0.1, right = 0.9, top=0.9, bottom=0.1, hspace=0.2, wspace=0.2)

	ccost, vvs1, vvs2, vvs3, vvs4, vvs5, vvs6 = [], [],[],[],[],[],[]
	for model in  sorted(model_list_sorted, key = itemgetter(1), reverse=True):
		cost = model[1]
		TT = model[2]
		EE = model[3]
		tthick = model[4]
		vvs = model[5]
		vvp = model[6]
		rrho = model[7]

		vs1 = vvs[0]
		vs2 = vvs[1]
		vs3 = vvs[2]
		vs4 = vvs[3]
		vs5 = vvs[4]
		vs6 = vvs[5]

		ccost.append(cost)
		vvs1.append(vs1)
		vvs2.append(vs2)
		vvs3.append(vs3)
		vvs4.append(vs4)
		vvs5.append(vs5)
		vvs6.append(vs6)








	#====================================================


	vmin = np.log10(min_cost)
	vmax = np.log10(max_cost)
	cmap = cm.jet
	m="*"
	fc="red"
	eg="black"
	s=400
	plt.subplot(5,5,1)
	cp = plt.scatter(vvs1, vvs2, c=np.log10(ccost),s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	plt.ylabel("Vs2", fontsize=15)
	plt.xlim(min(vvs1), max(vvs1))
	plt.ylim(min(vvs2), max(vvs2))


	plt.subplot(5,5,6)
	cp = plt.scatter(vvs1, vvs3, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	plt.ylabel("Vs3", fontsize=15)
	#plt.xlim(min(vvs1), max(vvs1))
	#plt.ylim(min(vvs3), max(vvs3))

	plt.subplot(5,5,7)
	cp = plt.scatter(vvs2, vvs3, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	#plt.xlim(min(vvs2), max(vvs2))
	#plt.ylim(min(vvs3), max(vvs3))

	plt.subplot(5,5,11)
	cp = plt.scatter(vvs1, vvs4, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	plt.xlim(min(vvs1), max(vvs1))
	plt.ylim(min(vvs4), max(vvs4))
	plt.ylabel("Vs4", fontsize=15)

	plt.subplot(5,5,12)
	cp = plt.scatter(vvs2, vvs4, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	plt.xlim(min(vvs2), max(vvs2))
	plt.ylim(min(vvs4), max(vvs4))

	plt.subplot(5,5,13)
	cp = plt.scatter(vvs3, vvs4, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	#plt.xlim(min(vvs3), max(vvs3))
	#plt.ylim(min(vvs4), max(vvs4))


	plt.subplot(5,5,16)
	cp = plt.scatter(vvs1, vvs5, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	plt.xlim(min(vvs1), max(vvs1))
	plt.ylim(min(vvs5), max(vvs5))
	plt.ylabel("Vs5", fontsize=15)
	

	plt.subplot(5,5,17)
	cp = plt.scatter(vvs2, vvs5, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	plt.xlim(min(vvs2), max(vvs2))
	plt.ylim(min(vvs5), max(vvs5))
	plt.xlabel("Vs2", fontsize=15)

	plt.subplot(5,5,18)
	cp = plt.scatter(vvs3, vvs5, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	#plt.xlim(min(vvs3), max(vvs3))
	#plt.ylim(min(vvs5), max(vvs5))
	plt.xlabel("Vs3", fontsize=15)

	plt.subplot(5,5,19)
	cp = plt.scatter(vvs4, vvs5, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	plt.xlim(min(vvs4), max(vvs4))
	plt.ylim(min(vvs5), max(vvs5))
	plt.xlabel("Vs4", fontsize=15)

	plt.subplot(5,5,21)
	cp = plt.scatter(vvs1, vvs6, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	plt.xlim(min(vvs1), max(vvs1))
	plt.ylim(min(vvs6), max(vvs6))
	plt.xlabel("Vs1", fontsize=15)
	plt.ylabel("Vs6", fontsize=15)

	plt.subplot(5,5,22)
	cp = plt.scatter(vvs2, vvs6, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	plt.xlim(min(vvs2), max(vvs2))
	plt.ylim(min(vvs6), max(vvs6))
	plt.xlabel("Vs2", fontsize=15)

	plt.subplot(5,5,23)
	cp = plt.scatter(vvs3, vvs6, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	#plt.xlim(min(vvs3), max(vvs3))
	#plt.ylim(min(vvs6), max(vvs6))
	plt.xlabel("Vs3", fontsize=15)

	plt.subplot(5,5,24)
	cp = plt.scatter(vvs4, vvs6, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	plt.xlim(min(vvs4), max(vvs4))
	plt.ylim(min(vvs6), max(vvs6))
	plt.xlabel("Vs4", fontsize=15)

	plt.subplot(5,5,25)
	cp = plt.scatter(vvs5, vvs6, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap)
	plt.xlim(min(vvs5), max(vvs5))
	plt.ylim(min(vvs6), max(vvs6))
	plt.xlabel("Vs5", fontsize=15)


	plt.savefig(result_folder + "/correlation.png")
	plt.close()

	return