Beispiel #1
0
def simplify3(nk):
	result=[]
	nk=np.array(nk)
	xk = nk/float(np.sum(nk))
	#print nk
	
	#X_plot = np.linspace(0, len(nk), 1000)[:, np.newaxis]
	sdiv=1000
	X_plot = np.linspace(0, len(xk), sdiv)[:, np.newaxis]
	custm = stats.rv_discrete(name='custm',a=0,b=7, values=(range(len(xk)), xk))
	yk= custm.rvs(size=100000)
	#yk.flatten()
	#fig, ax = plt.subplots(1, 1)
	#ax.hist(yk, normed=True, histtype='stepfilled', alpha=0.2)
	# gaussian KDE
	X=yk.reshape(-1, 1)
	kde = KernelDensity(kernel='gaussian', bandwidth=0.6).fit(X)
	log_dens = kde.score_samples(X_plot)
	mi, ma = argrelextrema(log_dens, np.less)[0], argrelextrema(log_dens, np.greater)[0]
	mi=np.rint(mi*float(len(xk))/float(sdiv))
	ma=np.rint(ma*float(len(xk))/float(sdiv))
	start=0	
	#print mi
	for i in mi:
		i=int(i)
		if start!=i:
			val=np.average(nk[start:i])
			for j in xrange(start,i):
				result.append(val)
		start=i	
	val=np.average(nk[start:])
	for j in xrange(start,len(nk)):
			result.append(val)
	return np.array(result)
Beispiel #2
0
def xy_kde(xy,bandwidth,N_grid=100,levels=[0.8,0.6,0.4,0.2]):  
    
    x_edges = np.linspace(np.min(xy[:,0]),np.max(xy[:,0]),N_grid+1)
    y_edges = np.linspace(np.min(xy[:,1]),np.max(xy[:,1]),N_grid+1)
    x_centres = np.array([x_edges[b] + (x_edges[b+1]-x_edges[b])/2 
                          for b in range(N_grid)])
    y_centres = np.array([y_edges[b] + (y_edges[b+1]-y_edges[b])/2 
                          for b in range(N_grid)])
    x_grid, y_grid = np.meshgrid(x_centres,y_centres)
    xy_grid = np.array([np.ravel(x_grid),np.ravel(y_grid)]).T
    kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(xy)
    H = np.exp(kde.score_samples(xy_grid).reshape(N_grid,N_grid))
    # this bit is taken from the corner_plot.py method.
    ######################################
    Hflat = H.flatten()
    inds = np.argsort(Hflat)[::-1]
    Hflat = Hflat[inds]
    sm = np.cumsum(Hflat)
    sm /= sm[-1]
    V = np.empty(len(levels))
    for i, v0 in enumerate(levels):
        try:
            V[i] = Hflat[sm <= v0][-1]
        except:
            V[i] = Hflat[0]
    #####################################
    V = np.sort(V)
    
    return H, V, x_grid, y_grid
def kdewrap(indata, kernel):
    grid = GridSearchCV(KernelDensity(),
                    {'bandwidth': np.linspace(0.1, 1.0, 30)},
                    cv=10) # 10-fold cross-validation
    grid.fit(indata[:, None])
    kde = KernelDensity(kernel=kernel, bandwidth=grid.best_params_["bandwidth"]).fit(indata[:, np.newaxis])
    return kde.score_samples(indata[:, np.newaxis])
Beispiel #4
0
def _importance_preprocess_uni(states, rewards, gradients, p_tar, p_gen):
    res = _create_episode_info()

    flat_states = [s for traj in states for s in traj]
    # TODO Pass in as args?
    kde = KernelDensity(kernel='gaussian', bandwidth=0.25)
    kde.fit(flat_states)

    for ss, rs, gs, ps, qs in izip(states, rewards, gradients, p_tar, p_gen):

        state_probs = kde.score_samples(ss)
        traj_p = np.cumsum(ps)  # + np.mean(state_probs)
        traj_q = np.cumsum(qs) + state_probs
        traj_grads = np.cumsum(gs, axis=0)
        r_acc = np.cumsum(rs[::-1])[::-1]
        r_grad = (r_acc * traj_grads.T).T

        res.r_grads.extend(r_grad)
        res.traj_p_tar.extend(traj_p)
        res.traj_p_gen.extend(traj_q)
        res.traj_grads.extend(traj_grads)
        res.traj_r.extend(r_acc)

        # Used for estimating fisher
        res.act_grads.extend(gs)
        res.state_act_p_tar.extend(traj_p)
        res.state_act_p_gen.extend(traj_q)

    return res
Beispiel #5
0
class OneClassKDE(BaseClassifier):
    _fit_params = ["bandwidth"]
    _predict_params = []
    def __init__(self, *args, **kwargs):
        self.bandwidth = kwargs["bandwidth"]
        self.perc_keep = kwargs["perc_keep"]
    
    def fit(self, data, **kwargs):
        #self.train_data = data
        self.kde = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth)
        
        idx = numpy.random.randint(2, size=len(data)).astype(numpy.bool)
        print idx
        
        
        self.kde.fit(data[idx, :])
        self.training_score = self.kde.score_samples(data[~idx, :])
        self.direct_thresh = numpy.percentile(self.training_score, 100-self.perc_keep)
        
        print 'training', self.training_score.min(), self.training_score.mean(), self.training_score.max(), self.direct_thresh
        
        print self.direct_thresh
    
    def predict(self, data):
        score = self.kde.score_samples(data)
        self.score = score
        res = (score < self.direct_thresh)
        print 'test', self.score.min(), self.score.mean(), self.score.max()
        print res.sum(), "of", len(self.score), 'outliers'
        
        return res.astype(numpy.uint8)*-2+1
    
    def decision_function(self, data=None):
        return self.score
def estimate_distribution(samples, h=0.1, n_points=100):
	kde = KernelDensity(bandwidth=h)
	samples = samples[:, np.newaxis]
	kde.fit(samples)
	xs = np.linspace(-1.0, 1.0, n_points)
	ys = [np.exp(kde.score([x])) for x in xs]
	return xs, ys
Beispiel #7
0
def kernel_estimation(test,train_n,train_p):    
    relevance_score=[]
    result_n=[]
    result_p=[]   

    X_n=np.array(train_n)   
    X_p=np.array(train_p)
    Y=np.array(test)
    
    #params = {'bandwidth': np.logspace(-1, 1, 20)}
    #grid = GridSearchCV(KernelDensity(), params)
    #grid.fit(X_n)
    
    #print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth))    
    
    kde_n = KernelDensity(kernel='gaussian', bandwidth=0.999).fit(X_n)
    kde_p = KernelDensity(kernel='gaussian', bandwidth=4.772).fit(X_p)
    for i in range(len(Y)):  
        result_n.append(np.exp(float(str(kde_n.score_samples(Y[i])).replace('[','').replace(']',''))))
        result_p.append(np.exp(float(str(kde_p.score_samples(Y[i])).replace('[','').replace(']',''))))
        if i%1000==0:
            print i      
    
    for i in range(len(result_n)): 
        if result_n[i]==0.0:
            relevance_score.append(np.log(result_p[i]/1.8404e-17+1))
        else:
            relevance_score.append(np.log(result_p[i]/result_n[i]+1))

    return relevance_score
def sklearn_kde_plot(dataframe, choose_choice, topic_name, fold_num):
    # print(dataframe)
    N = dataframe.values.size
    X = dataframe.values[:, np.newaxis]

    # X_plot = np.linspace(min(dataframe.values), max(dataframe.values), num=500)[:, np.newaxis]
    X_plot = np.linspace(min(dataframe.values), 10, num=500)[:, np.newaxis]                                     # SET THISS
    # X_plot = np.linspace(min(dataframe.values), 10, num=500)[:, np.newaxis]
    # print(min(dataframe.values))
    # print(max(dataframe.values))
    # print(dataframe)

    true_dens = (0.3 * norm(0, 1).pdf(X_plot[:, 0]) + 0.7 * norm(5, 1).pdf(X_plot[:, 0]))
    fig, ax = plt.subplots()
    # ax.fill(X_plot, true_dens, fc='black', alpha=0.2, label='input distribution')

    # kde = KernelDensity(kernel='gaussian', bandwidth=0.005).fit(X)  # 'tophat', 'epanechnikov'
    kde = KernelDensity(kernel='gaussian', bandwidth=0.01).fit(X)  # 'tophat', 'epanechnikov'              SET THISSSSSSSS
    log_dens = kde.score_samples(X_plot)
    ax.plot(X_plot[:, 0], np.exp(log_dens), '-', label="kernel = '{0}'".format('gaussian'))

    ax.text(6, 0.38, "N={0} points".format(N))
    ax.legend(loc='upper right')
    # ax.plot(X[:, 0], -0.005 - 0.0005 * np.random.random(X.shape[0]), '+k')
    ax.plot(X[:, 0], -0.005 - 0.005 * np.random.random(X.shape[0]), '+k')

    # ax.set_xlim(min(dataframe.values), max(dataframe.values))
    ax.set_xlim(0, 10)                                                                                      # SET THISSSSSSSS
    # ax.set_ylim(-0.02, 1)
    ax.set_ylim(-0.02, 1.0)                                                                                 # SET THISSSSSSSS
    ax.set_xlabel("Delta Follower")
    ax.set_ylabel("Density")
    plt.title('Density - ' + choose_choice + ' (' + topic_name + ', ' + fold_num + ')')
    plt.show()
    return
Beispiel #9
0
def basic_properties( sequences , axess=None, labl = None, logscale=[False], markr='.', clr='k',offset=0, alfa = 0.8,
                      distir = [False,False,False, False], bandwidths = [3, 0.1,0.01,1], limits = [(1,50),(0,1),(0,1),(1,25)] ):
    if axess is None:
        fig,axess = plt.subplots( 3, len(sequences),False,False, squeeze=False,figsize=(len(sequences)*3,8))#'col'
    plt.subplots_adjust(left=0.12, bottom=0.05, right=0.95, top=0.94,   wspace=0.28, hspace=0.1)
    plt.subplots_adjust(left=0.45, bottom=0.05, right=0.95, top=0.94,   wspace=0.28, hspace=1.2)

    for i in range(0,len(sequences)):
        ax = axess[offset][i]
        seq = sequences[i]
        smax =max(seq)
        smin =min(seq)

        if distir[i]==0:
            #print seq
            freqs , bin_edges = np.histogram(seq,  smax+1 if smax>1 else 100, range = (0,smax+1) if smax>1 else (0,smax))#, normed = True, density=True)
            bin_centers =  (bin_edges[:-1] + bin_edges[1:])/2.
            vals = range(0,smax+1) if smax>1 else bin_centers
            freqs=freqs*1.0/sum(freqs)
            #remove zeros
            y = np.array(freqs)
            nz_indexes = np.nonzero(y)
            y = y[nz_indexes]
            x = np.array(vals)[nz_indexes]
            ax.plot(x, y,':', label=labl, alpha =alfa, color = clr ,  marker ='.')
        else :
            X = np.array(seq)
            X = [ x for x in X if x>=limits[i][0] and x<=limits[i][1]]
    #         X= (np.abs(X))
#             print len(X)
            X = np.random.choice(X, size=min(10000, len(X)))
            X = X[:, np.newaxis]
            kde = KernelDensity(kernel = 'gaussian', bandwidth=bandwidths[i]).fit(X)#,atol=atols[i],kernel = 'tophat'kernel='gaussian'
#             if 'x' in logscale[i] : 
#                 X_plot = np.logspace( limits[i][0],  limits[i][1], 1000)[:, np.newaxis]
#             else :
            X_plot = np.linspace(limits[i][0], limits[i][1], 1000)[:, np.newaxis]
    
            log_dens = kde.score_samples(X_plot) #
    #         ax.fill(X_plot[:, 0], np.exp(log_dens), alpha =0.5, label=labl)
            Y  =  np.exp(log_dens)
            if  distir[i]==2: Y = np.cumsum(Y)
            ax.plot(X_plot[:, 0],Y, '-',label=labl, alpha =alfa, color = clr ,markersize=2,  marker ='')
    
            verts = [(limits[i][0]-1e-6, 0)] + list(zip(X_plot[:, 0],Y)) + [(limits[i][1]+1e-6, 0)]
            poly = Polygon(verts, facecolor=clr,  alpha =alfa ) #, edgecolor='0.5')
            ax.add_patch(poly)
    #         ax.set_yticks([])
    #         ax.set_ylim(bottom=-0.02)
            ax.set_xlim(limits[i][0],limits[i][1])
            
        if len(logscale)==len(sequences): 
            if 'x' in logscale[i] : 
                ax.set_xscale('log')
            if 'y' in logscale[i] : 
                ax.set_yscale('log')
                if i<3: ax.set_ylim(bottom=0.001)
#         ax.legend()
#         plt.show(block=False)
    return axess
Beispiel #10
0
def pdf(data: list):
    # hist, bin = np.histogram(data, bins=50)
    # return hist
    kde = KernelDensity(kernel='gaussian', bandwidth=0.1).fit([[x] for x in data])
    b = [[x] for x in np.linspace(min(data), max(data), 100)]
    a = np.exp(kde.score_samples(b))

    return a
Beispiel #11
0
def find_max_density_point(point_list):
    point_list, _ = remove_nan(point_list)
    if point_list.shape[0] == 0:
        return [float('nan'),float('nan'),float('nan')]
    kde = KernelDensity(kernel='gaussian', bandwidth=0.01).fit(point_list)
    prob_list = kde.score_samples(point_list)
    max_point = point_list[np.argmax(prob_list)]
    # print "max", max_point
    return max_point
Beispiel #12
0
def histLine(axes, data, minmax, color):
	(xmin, xmax) = minmax
	data = data.reshape(-1, 1)
	kde = KernelDensity(bandwidth=(xmax-xmin)/100.0).fit(data)
	x = np.linspace(xmin, xmax, 100).reshape(-1, 1)
	foo = kde.score_samples(x)
	density = np.exp(foo)

	axes.plot(x, density, color=color)
def createfeatmat(N):
    grid = getgridcoords(N).T
    featmat = np.zeros((len(vals), N ** 2))
    for i in range(len(vals)):
        m = np.array([vals[i][0], vals[i][1]]).T
        k = KernelDensity(bandwidth=0.5 / (N - 1), kernel="gaussian")
        k.fit(m)
        featmat[i, :] = k.score_samples(grid)
    return featmat
def estimate_distribution(samples, h=0.1, n_points=100):
	kde = KernelDensity(bandwidth=h)
	min_xs = min(samples)
	max_xs = max(samples)
	samples = samples[:, np.newaxis]
	kde.fit(samples)
	xs = np.linspace(min_xs, max_xs, n_points)
	ys = np.exp(kde.score_samples(xs[:, np.newaxis]))
	print xs.shape, ys.shape, sum(ys)
	return xs, ys
Beispiel #15
0
def kde_sklearn(x, x_grid, bandwidth=0.2, **kwargs):
    """Kernel Density Estimation with Scikit-learn"""
    kde_skl = KernelDensity(bandwidth=bandwidth, **kwargs)
    kde_skl.fit(x[:, np.newaxis])
    # score_samples() returns the log-likelihood of the samples
    log_pdf = kde_skl.score_samples(x_grid[:, np.newaxis])
    
    N = np.trapz(np.exp(log_pdf), x_grid)

    return np.exp(log_pdf)/N
    def fit(self, X, y):
        a = np.zeros((24, 7))
        hours = np.copy(X[:, 1])
        weekdays = np.copy(X[:, 2])
        hours = 23 * normalize(hours)
        weekdays = 6 * normalize(weekdays)

        if self.strategy == 'mean':
            counts = a.copy()
            for i, row in enumerate(zip(hours, weekdays)):
                hour = int(row[0])
                day = int(row[1])
                counts[hour, day] += 1
                a[hour, day] += y[i]

            counts[counts == 0] = 1
            self._model = a / counts

        elif self.strategy in ('median', 'kernel'):

            # this is a 3d array 
            groups = [[[] for i in range(7)] for j in range(24)]

            for i, row in enumerate(zip(hours, weekdays)):
                hour = int(row[0])
                day = int(row[1])
                groups[hour][day].append(y[i])

            if self.strategy == 'median':
                for i, j in np.ndindex((24, 7)):
                    if groups[i][j]:
                        a[i,j] = np.median(groups[i][j])
                    else:
                        a[i,j] = np.nan
            elif self.strategy == 'kernel':
                # kernel method computes a kernel density for each of the
                # bins and determines the most probably value ('mode' of sorts)
                grid = np.linspace(np.min(y), np.max(y), 1000)[:, np.newaxis]
                for i, j in np.ndindex((24, 7)):
                    if groups[i][j]:
                        npgroups = np.array(groups[i][j])[np.newaxis]
                        kernel = KernelDensity(kernel='gaussian', \
                                                bandwidth=0.2).fit(npgroups.T)
                        density = kernel.score_samples(grid)
                        dmax = np.max(density)
                        imax = np.where(density==dmax)
                        a[i,j] = grid[imax, 0]
                    else:
                        a[i,j] = np.nan

            self._model = a

        # smooth the model here if there are nans
        return self
def plot_stan_trc(dftrc):
    """
       Create simple plots of parameter distributions and traces from 
       output of pystan sampling. Emulates pymc traceplots.
    """

    fig, ax2d = plt.subplots(nrows=dftrc.shape[1], ncols=2, figsize=(14, 1.8*dftrc.shape[1]),
                                facecolor='0.99', edgecolor='k')
    fig.suptitle('Distributions and traceplots for {} samples'.format(
                                dftrc.shape[0]),fontsize=14)
    fig.subplots_adjust(wspace=0.2, hspace=0.5)

    k = 0
    
    # create density and traceplot, per parameter coeff
    for i, (ax1d, col) in enumerate(zip(ax2d, dftrc.columns)):

        samples = dftrc[col].values
        scale = (10**np.round(np.log10(samples.max() - samples.min()))) / 20
        kde = KernelDensity(bandwidth=scale).fit(samples.reshape(-1, 1))
        x = np.linspace(samples.min(), samples.max(), 100).reshape(-1, 1)
        y = np.exp(kde.score_samples(x))
        clr = sns.color_palette()[0]

        # density plot
        ax1d[0].plot(x, y, color=clr, linewidth=1.4)
        ax1d[0].vlines(np.percentile(samples, [2.5, 97.5]), ymin=0, ymax=y.max()*1.1,
                       alpha=1, linestyles='dotted', colors=clr, linewidth=1.2)
        mn = np.mean(samples)
        ax1d[0].vlines(mn, ymin=0, ymax=y.max()*1.1,
                       alpha=1, colors='r', linewidth=1.2)
        ax1d[0].annotate('{:.2f}'.format(mn), xy=(mn,0), xycoords='data'
                    ,xytext=(5,10), textcoords='offset points', rotation=90
                    ,va='bottom', fontsize='large', color='#AA0022')    
        ax1d[0].set_title('{}'.format(col), fontdict={'fontsize':10})


        # traceplot
        ax1d[1].plot(np.arange(len(samples)),samples, alpha=0.2, color=clr, linestyle='solid'
                              ,marker=',', markerfacecolor=clr, markersize=10)
        ax1d[1].hlines(np.percentile(samples,[2.5, 97.5]), xmin=0, xmax=len(samples),
                       alpha=1, linestyles='dotted', colors=clr)
        ax1d[1].hlines(np.mean(samples), xmin=0, xmax=len(samples), alpha=1, colors='r')

        k += 1
                
        ax1d[0].set_title('{}'.format(col), fontdict={'fontsize':14})#,'fontweight':'bold'})
        #ax1d[0].legend(loc='best', shadow=True)
        
        _ = [ax1d[j].axes.grid(True, linestyle='-', color='lightgrey') for j in range(2)]
            
    plt.subplots_adjust(top=0.94)
    plt.show()
Beispiel #18
0
def densityEst(a,x,p,knn=1,Mode='G'):
	""" This is a density estimation currently supporting
	 one-dimensional Data.
	 There are two modes of operation:
	 knn==0 (Default) use fixed bandwidth.
	 knn==1 use k nearest neigbors.
	 Tow types of kernel are supported:
	 Mode=='T' (Default) for triangular.
	 Mode=='G' for Gaussian.
	 a is a vector of samples.
	 p is the parameter of model (bandwidth when knn=0 of number of neighbors
	 otherwise.
	 x is  points of estimation
	"""
	N=len(x)
	x.resize(N,1)
	l=len(a)
	a=num.array(a)
	a.resize(l,1)
	if knn==0:
		try:
			from sklearn.neighbors.kde import KernelDensity
		except ImportError:
			print 'Error:Please install sklearn package...'
			return
		if Mode=='T':
			S='linear'
		elif Mode=='G':
			S='gaussian'
		else:
			print 'Currently only G(gaussian) and T(triangular) Modes are supported'
			return
		kde = KernelDensity(kernel=S, bandwidth=p).fit(a)
		return (x,num.exp(kde.score_samples(x)))
	elif knn==1:
		try:
			from sklearn.neighbors import NearestNeighbors
		except ImportError:
			print 'Error:Please install sklearn package...'
			return
		neigh = NearestNeighbors(n_neighbors=p)
		neigh.fit(a)
		dist,index=neigh.kneighbors(x)
		H=dist[:,-1]
		est=[0.0]*N
		for i,point_v in enumerate(x):
			point=point_v[0]
			h=H[i]
			est[i]=sum(kernel((a-point)/h,Mode))/(l*h)
		return (x,est)
	else:
		print 'knn must be 0 or 1'
		return
def train_rlos(data, show_chart=False):
    """Train LOS estimator"""
    """Train patient LOS for triplet (sex, age, sline)"""
    freq = {}
    for row in data:
        sex = int(row["sex"])
        age = fp.split_age(int(row["age"]))
        sline = row["sline"]
        rlos = int(row["rlos"])

        if rlos == 0:
            print "RLOS equals zero for sex %d, age %d, SL %s" % (sex, age, sline)

        tuple = (sex, age, sline)
        freq.setdefault(tuple, [])
        freq[tuple].append(rlos)

    result = {}
    for tuple, train_data in freq.items():
        (sex, age, sline) = tuple
        if len(train_data) < training_threshold:
            print "Too small training set (<%d) for sex %d, age %d, SL %s. Data will be skipped. " % \
                  (training_threshold, sex, age, sline)
            continue

        X = np.array([train_data]).transpose()
        kde = KernelDensity(kernel='tophat', bandwidth=0.5).fit(X)
        kdef = lambda size: [round(l[0]) for l in kde.sample(size).tolist()]
        result[tuple] = kde

        if show_chart:
            # print "Sex=%d, Age=%d, SL=%s" % (sex, age, sline)
            # print_freq(ages)
            samples = kdef(len(train_data)) if len(train_data) < 500 else kdef(500)
            # print_freq(samples)

            # hist for train data
            plt.subplot(211)
            plt.title("RLOS train data for Sex=%d, Age=%d, SL=%s" % (sex, age, sline))
            plt.ylabel('freq')
            plt.xlabel('RLOS')
            plt.hist(train_data)

            # estimated density
            plt.subplot(212)
            plt.title("Estimated density Sex=%d, Age=%d, SL=%s" % (sex, age, sline))
            plt.ylabel('freq')
            plt.xlabel('RLOS')
            plt.hist(samples)

            plt.show()

    return result
Beispiel #20
0
def pda_single(synth_data, data, bandwidth=.1):
    #synth_data = np.log(np.abs(synth_data))[:, np.newaxis]
    #data_log = np.log(np.abs(data))[:, np.newaxis]
    synth_data = synth_data[:, np.newaxis]
    data = data[:, np.newaxis]
    if bandwidth == 'silverman':
        lower, upper = scoreatpercentile(synth_data, [25, 75])
        iqr = upper - lower
        sd = np.std(synth_data)
        bandwidth = .9 * min(sd, iqr/1.34) * len(data)**(-1./5)

    kde = KernelDensity(kernel='epanechnikov', bandwidth=bandwidth).fit(synth_data)
    return kde.score_samples(data)
def train_admit_count(data, show_chart=False):
    """Train patient admittance number for triplet (sex, age, sline)"""
    freq = {}
    for row in data:
        sex = int(row["sex"])
        age = fp.split_age(int(row["age"]))
        sline = row["sline"]
        admit = row["admit"]

        tuple = (sex, age, sline)
        freq.setdefault(tuple, {})
        freq[tuple].setdefault(admit, 0)
        freq[tuple][admit] += 1

    result = {}
    for tuple, days in freq.items():
        (sex, age, sline) = tuple
        train_data = days.values()
        if len(train_data) < training_threshold:
            print "Too small training set (<%d) for sex %d, age %d, SL %s. Data will be skipped. " % \
                  (training_threshold, sex, age, sline)
            continue

        X = np.array([train_data]).transpose()
        kde = KernelDensity(kernel='tophat', bandwidth=0.5).fit(X)
        kdef = lambda size: [int(round(l[0])) for l in kde.sample(size).tolist()]
        result[tuple] = kde

        if show_chart:
            # print "Sex=%d, Age=%d, SL=%s" % (sex, age, sline)
            # print_freq(ages)
            samples = kdef(len(train_data)) if len(train_data) < 500 else kdef(500)
            # print_freq(samples)

            # hist for train data
            plt.subplot(211)
            plt.title("Admit count train data for Sex=%d, Age=%d, SL=%s" % (sex, age, sline))
            plt.ylabel('freq')
            plt.xlabel('admittance count')
            plt.hist(train_data)

            # estimated density
            plt.subplot(212)
            plt.title("Estimated density Sex=%d, Age=%d, SL=%s" % (sex, age, sline))
            plt.ylabel('freq')
            plt.xlabel('admittance count')
            plt.hist(samples)

            plt.show()

    return result
def find_centroid(data, bandwidth=0.003, iter_num=6, halfwidth=0.02):
	kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(data)
	grid = 10
	position = np.array([0,0])
	#halfwidth = 0.02
	for i in range(iter_num):
		low = position-halfwidth
		high = position+halfwidth
		X, Y = np.mgrid[low[0]:high[0]:20j, low[1]:high[1]:20j]
		positions = np.vstack([X.ravel(), Y.ravel()]).T
		img = kde.score_samples(positions)
		position = positions[np.argmax(img)]
		halfwidth = halfwidth*2./(grid-1.)
	return position
Beispiel #23
0
    def __init__(self, optimizer=None, reward_model=None, mode_classifier=KNeighborsClassifier,
                 mode_args=None):
        if reward_model is None:
            self.reward_model = GPRewardModel()
        else:
            self.reward_model = reward_model
        self.reward_model_fitted = False

        self.mode_classifier = mode_classifier
        if mode_args is None:
            self.mode_args = {'weights': 'distance'}

        self.states = []
        self.actions = []
        self.rewards = []

        self.clusters = None
        self.clusters_init = False
        self.cluster_actions = []
        self.cluster_rewards = []
        self.active_clusters = []
        self.n_modes = 0

        self.sa_kde = KernelDensity()  # TODO

        if optimizer is None:
            self.optimizer = BFGSOptimizer(mode='max', num_restarts=3)
            self.optimizer.lower_bounds = -1
            self.optimizer.upper_bounds = 1  # TODO
        else:
            self.optimizer = optimizer
Beispiel #24
0
def test_density_plot():
    fig, ax = plt.subplots(2, 2, sharex=True, sharey=True)
    
    N=20
    X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
                        np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]
                        
    print np.shape(X)
    X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]   
    print np.shape(X_plot)
    kde = KernelDensity(kernel='gaussian', bandwidth=0.75).fit(X)
    log_dens = kde.score_samples(X_plot)
    ax[0,0].fill(X_plot[:, 0], np.exp(log_dens), fc='#AAAAFF')
    ax[0,0].text(-3.5, 0.31, "Gaussian Kernel Density")
    ax[0,0].plot(X[:, 0], np.zeros(X.shape[0]) - 0.01, '+k')
    
    plt.show()
Beispiel #25
0
def makeKDE(m):

	m = m[(m>-100) & (m<100)]
	m = m[:, np.newaxis] # Training data
	l = len(m)
	sigma = np.std(m)
	kdebw = (1.*4/3*sigma**5/ l)**(1./5.)
	
	try:
		X_plot = np.linspace(rng[0], rng[1], 1000)[:, np.newaxis]
		kde = KernelDensity(kernel='gaussian', bandwidth=kdebw).fit(m)
		log_dens = kde.score_samples(X_plot)
		log_dens_exp = np.exp(log_dens)
		KDE_mag = np.float(X_plot[np.argmax(log_dens_exp)])
	except ValueError:
		log_dens_exp = np.ones(len(X_plot[:,0]))*-99.99
		KDE_mag, sigma = -99.99, -99.99
	return X_plot, log_dens_exp, KDE_mag, sigma
Beispiel #26
0
class OneClassKDE(BaseClassifier):
    _fit_params = ["bandwidth"]
    def __init__(self, *args, **kwargs):
        self.bandwidth = kwargs["bandwidth"]
    
    def fit(self, data, **kwargs):
        #self.train_data = data
        self.kde = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth)
        self.kde.fit(data)
        self.training_score = self.kde.score_samples(data)
        self.direct_thresh = numpy.percentile(self.training_score, 10)
    
    def predict(self, data):
        score = self.kde.score_samples(data)
        self.score = score
        return (score < self.direct_thresh).astype(numpy.int32)*-2+1
    
    def decision_function(self, data):
        return self.score
Beispiel #27
0
def plot_2d(i1, i2):
    #px = pca.components_[i1]
    #py = pca.components_[i2]
    #xlabel, ylabel = [], []
    #for i in xrange(len(px)):
    #    xlabel.append('%.2f %s' % (px[i], colnames[i]))
    #    ylabel.append('%.2f %s' % (py[i], colnames[i]))
    #ax = plt.axes()
    #ax.yaxis.set_label_coords(-0.05, 0.2)
    plt.clf()
    xy = np.vstack([output[:, i1], output[:, i2]]).T
    kde = KernelDensity(kernel='tophat', bandwidth=0.01, leaf_size=10).fit(xy)
    z = kde.score_samples(xy)
    # Sort the points by density, so that the densest points are plotted last
    idx = z.argsort()
    x, y, z = output[idx, i1], output[idx, i2], z[idx]
    plt.xlabel('Component %i' % i1)
    plt.ylabel('Component %i' % i2)
    plt.scatter(x, y, c=z, s=10, edgecolor='')
    plt.savefig('ML_data/%s_pca_%s_%s.png' % (name, i1, i2))
def chart_by_time():
  weekday_amrush = [sum(traj[1:]) for traj in trajs_with_time if traj[0].weekday() not in [5,6] and traj[0].hour in [7,8,9]]
  weekday_pmrush = [sum(traj[1:]) for traj in trajs_with_time if traj[0].weekday() not in [5,6] and traj[0].hour in [17,18,19]]
  weekday_midday = [sum(traj[1:]) for traj in trajs_with_time if traj[0].weekday() not in [5,6] and traj[0].hour in [10,11,12,13,14,15,16]]
  weekday_night = [sum(traj[1:]) for traj in trajs_with_time if traj[0].weekday() not in [5,6] and traj[0].hour in [20,21,22,23,0,1,2,3,4,5,6]]
  weekend = [sum(traj[1:]) for traj in trajs_with_time if traj[0].weekday() in [5,6]]

  weekday_amrush_avg = sum(weekday_amrush) / float(len(weekday_amrush))
  weekday_pmrush_avg = sum(weekday_pmrush) / float(len(weekday_pmrush))
  weekday_midday_avg = sum(weekday_midday) / float(len(weekday_midday))
  weekday_night_avg = sum(weekday_night) / float(len(weekday_night))
  weekend_avg = sum(weekend) / float(len(weekend))


  print("weekday_amrush_avg: ", weekday_amrush_avg,
        "weekday_pmrush_avg: ", weekday_pmrush_avg,
        "weekday_midday_avg: ", weekday_midday_avg,
        "weekday_night_avg: ", weekday_night_avg,
        "weekend_avg: ", weekend_avg)

  x = np.linspace(min(weekday_amrush+weekday_pmrush+weekday_midday+weekday_night+weekend), max(weekday_amrush+weekday_pmrush+weekday_midday+weekday_night+weekend), 100).reshape(-1, 1)
  kde_weekday_amrush = KernelDensity(bandwidth=70).fit(np.array(weekday_amrush).reshape(-1, 1))
  density_weekday_amrush = np.exp(kde_weekday_amrush.score_samples(x))
  kde_weekday_pmrush = KernelDensity(bandwidth=70).fit(np.array(weekday_pmrush).reshape(-1, 1))
  density_weekday_pmrush = np.exp(kde_weekday_pmrush.score_samples(x))
  kde_weekday_midday = KernelDensity(bandwidth=70).fit(np.array(weekday_midday).reshape(-1, 1))
  density_weekday_midday = np.exp(kde_weekday_midday.score_samples(x))
  kde_weekday_night = KernelDensity(bandwidth=70).fit(np.array(weekday_night).reshape(-1, 1))
  density_weekday_night = np.exp(kde_weekday_night.score_samples(x))
  kde_weekend = KernelDensity(bandwidth=70).fit(np.array(weekend).reshape(-1, 1))
  density_weekend = np.exp(kde_weekend.score_samples(x))

  plt.plot(x, density_weekday_amrush, 'r')
  plt.plot(x, density_weekday_pmrush, 'y')
  plt.plot(x, density_weekday_midday, 'g')
  plt.plot(x, density_weekday_night, 'b')
  plt.plot(x, density_weekend, 'm')
  plt.xlabel("Time start to endpoint")
  plt.ylabel("Density")
  plt.show()
Beispiel #29
0
def simplify_data2(x,y,size):
	avg=[]
	result=[]
	kde = KernelDensity(kernel='tophat', bandwidth=0.5).fit(x)
	s = np.linspace(0,size,len(x))
	e = kde.score_samples(s.reshape(-1,1))
	mi, ma = argrelextrema(e, np.less)[0], argrelextrema(e, np.greater)[0]
	start=0	
	for i in mi:
		val=np.average(x[start:i])
		for j in xrange(start,i):
			result.append(val)
		start=i	
	val=np.average(x[start:])
	for j in xrange(start,len(x)):
			result.append(val)
	#plt.plot(s, e*0.01+e[mi[0]])
	print mi
	print ma
	plt.plot(s,x.reshape(1,-1)[0])
	plt.plot(s,result)
	#print x, len(x)
	plt.show()
def chart_by_day():
  #
  # On average, trips on the weekend take less time than trips on weekdays
  # 1337 sec versus 1446 sec
  # 
  weekend_times = [sum(traj[1:]) for traj in trajs_with_time if traj[0].weekday() in [5,6]]
  weekday_times = [sum(traj[1:]) for traj in trajs_with_time if traj[0].weekday() not in [5,6]]
  weekend = sum(weekend_times) / float(len(weekend_times))
  weekday = sum(weekday_times) / float(len(weekday_times))
  print("weekend: ", weekend, "weekday: ", weekday)
  x = np.linspace(min(weekend_times + weekday_times), max(weekend_times + weekday_times), 100).reshape(-1, 1)

  kde_weekend = KernelDensity(bandwidth=100).fit(np.array(weekend_times).reshape(-1, 1))
  density_weekend = np.exp(kde_weekend.score_samples(x))

  kde_weekday = KernelDensity(bandwidth=100).fit(np.array(weekday_times).reshape(-1, 1))
  density_weekday = np.exp(kde_weekday.score_samples(x))

  plt.plot(x, density_weekend, 'r')
  plt.plot(x, density_weekday, 'b')
  plt.xlabel("Time start to Grand Ave: red: weekend, blue, weekday")
  plt.ylabel("Density")
  plt.show()
Beispiel #31
0
def density_estimation(data, img, imagepath):
    img = io.imread(imagepath, as_gray=True)
    print(data.shape)
    data = data * 4
    l = data.shape[0] // 2

    x = data[::2, :].ravel()
    y = data[1::2, :].ravel()
    xmin, xmax = np.min(x), np.max(x)
    ymin, ymax = np.min(y), np.max(y)

    X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]

    positions = np.vstack([X.ravel(), Y.ravel()])
    values = np.vstack([x, y])

    kernel = stats.gaussian_kde(values, bw_method=0.2)
    Z = np.reshape(kernel(positions), X.shape)

    fig = plt.figure()
    # fig = plt.figure(figsize=(10, 6))
    ax = fig.add_subplot(111)

    ax.imshow(np.rot90(Z),
              cmap=plt.cm.gist_earth_r,
              extent=[xmin, xmax, ymin, ymax])
    ax.plot(x, y, '+k', markersize=0.5)
    ax.set_xlim([xmin, xmax])
    ax.set_ylim([ymin, ymax])
    plt.gca().invert_yaxis()
    ax.axis('off')
    plt.show()

    fig, ax_ = plt.subplots()
    ax_.imshow(img, cmap=plt.cm.gray)
    # ax_.set_title('KDE')

    # ax_.pcolormesh(X, Y, Z, shading='goudaud', alpha=0.4, cmap=plt.cm.gist_earth_r)
    #ax_.contourf(X, Y, Z, alpha=0.45, cmap=plt.cm.gist_earth_r)

    kde_ = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(data.T)
    sc = kde_.score_samples(data.T)

    max = np.argmax(np.exp(sc))
    shape = data[:, max]
    shape = np.reshape(shape, (l, 2))
    # shape *= 8
    # show_landmarks_detected(shape)
    a = shape[:, 0]
    b = shape[:, 1]
    # plt.plot(a,b, 'r.')
    # plt.show()
    # plt.imshow(img, cmap=plt.cm.gray)
    print(a[0])
    print(b[0])
    ax_.plot(a, b, 'r.', markersize=8, mec='k', mew=0.3)
    ax_.plot(a[0], b[0], 'b.', markersize=8, mec='k', mew=0.3)
    ax_.axis('off')

    #plt.scatter(x, y, c='k', s=2, edgecolor='white')
    plt.show()
Beispiel #32
0
    return data, lab


X, y = gen_cb(5000, .25, 3.14159 / 4)
X_test, y_test = gen_cb(5000, .25, 3.14159 / 4)
plt.figure()
plt.title('Initial checker board data plot')
plt.plot(X[np.where(y == 1)[0], 0], X[np.where(y == 1)[0], 1], 'o')
plt.plot(X[np.where(y == 2)[0], 0], X[np.where(y == 2)[0], 1], 's', c='r')
# plt.show()

X1 = X[np.where(y == 1)[0], :]
X2 = X[np.where(y == 2)[0], :]

# Kernel density functions
kdfX1 = KernelDensity(kernel='gaussian', bandwidth=0.1).fit(X1)
kdfX2 = KernelDensity(kernel='gaussian', bandwidth=0.1).fit(X2)

score1 = kdfX1.score_samples(X_test)
score2 = kdfX2.score_samples(X_test)

score1Exp = math.e**(score1)
score2Exp = math.e**(score2)

Y = []
i = 0
for i in range(len(X_test)):
    if score1Exp[i] > score2Exp[i]:
        Y.append(1)
    else:
        Y.append(2)
# part 1: category the locations

# test

f, ax = plt.subplots(2, 2)
plotX = False
if plotX == True:
    x = Cx
    X_plot = np.linspace(-180, 180, len(x))[:, np.newaxis]
else:
    x = Cy
    X_plot = np.linspace(-90, 90, len(x))[:, np.newaxis]

# KDE
kde = KernelDensity(kernel='epanechnikov', bandwidth=0.05).fit(x)  # gaussian
log_dens = kde.score_samples(X_plot)
ax[0, 0].fill(X_plot[:, 0], np.exp(log_dens), fc='#AAAAFF')
# ax[0,0].text(x = 'left',y = 'bottom',s= "epanechnikov Kernel, size =50k, b=0.05")
ax[0, 0].set_title(label="epanechnikov Kernel, size =" + str(size / 1000) +
                   "k, b=0.05",
                   loc="center")
# KDE
kde = KernelDensity(kernel='epanechnikov', bandwidth=0.75).fit(x)  # gaussian
log_dens = kde.score_samples(X_plot)
ax[0, 1].fill(X_plot[:, 0], np.exp(log_dens), fc='#AAAAFF')
ax[0, 1].set_title(label="epanechnikov Kernel, size =" + str(size / 1000) +
                   "k, b=0.75",
                   loc="center")
# KDE
kde = KernelDensity(kernel='epanechnikov', bandwidth=2.25).fit(x)  # gaussian
Beispiel #34
0
file_name = 'SSB'
file_path = r'../data/' + position + '/' + file_name + '_Tsne.csv'
file_write_path = r'../data/' + position + '/' + file_name + '_id_x_y_kde.json'

print('file_path:   {}'.format(file_path))
print('file_write_path:   {}'.format(file_write_path))

with open(file_path) as f:
    ans_dict = {}
    temp_list = []
    id_list = []
    while True:
        line = f.readline()
        if not line:
            break
        line = line.replace('\n', '').split(',')
        id_list.append(line[0])
        temp_list.append([line[1], line[2]])
        ans_dict[line[0]] = {'id': line[0], 'x': line[1], 'y': line[2]}
    X = np.array(temp_list)
    kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(X)
    kde_list = np.exp(kde.score_samples(X))
    print(id_list)
    for i in range(len(id_list)):
        ans_dict[id_list[i]]['kde'] = kde_list[i]
        print(i)
    print(ans_dict)
    fw = open(file_write_path, 'w+')
    fw.write(json.dumps(ans_dict))
    fw.close()
Beispiel #35
0
}

# out.txt as Input  and outf.txt as Output

with open('out.txt') as infile, open('outf.txt', 'w') as outfile:
    for line in infile:
        for src, target in replacements2.iteritems():
            line = line.replace(src, target)
        outfile.write(line)

X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])

val = val = ast.literal_eval(open(fp).read())
size = val[len(val) - 1]

kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(X)

print(kde.score_samples(X))
plt.title("Kernel Density Estimation")
plt.plot(linspace(0, size, size * 2), kde, 'r')
plt.xlabel("Sequence length")
plt.ylabel("Probability density")

tree = xml.parse('0b8aef4d2de04dea51228e406270662035904866.LOOPER.xml')
root = tree.getroot()
text_file = open("Output.txt", "w")
sys.stdout = text_file

for elem in tree.iter():
    print >> text_file, elem.tag
Beispiel #36
0
class GAE:
    def __init__(self, img_shape=(28, 28), encoded_dim=2):
        self.img_shape = img_shape
        self.encoded_dim = encoded_dim
        self.optimizer = Adam(0.001)
        self.optimizer_discriminator = Adam(0.00001)
        self.discriminator = self.get_discriminator_model(img_shape)
        self.decoder = self.get_decoder_model(encoded_dim, img_shape)
        self.encoder = self.get_encoder_model(img_shape, encoded_dim)
        # Initialize Autoencoder
        img = Input(shape=self.img_shape)
        encoded_repr = self.encoder(img)
        gen_img = self.decoder(encoded_repr)
        self.autoencoder = Model(img, gen_img)
        # Initialize Discriminator
        latent = Input(shape=(encoded_dim,))
        gen_image_from_latent = self.decoder(latent)
        is_real = self.discriminator(gen_image_from_latent)
        self.decoder_discriminator = Model(latent, is_real)
        # Finally compile models
        self.initialize_full_model(encoded_dim)

    def initialize_full_model(self, encoded_dim):
        self.autoencoder.compile(optimizer=self.optimizer, loss='mse')
        self.discriminator.compile(optimizer=self.optimizer,
                                   loss='binary_crossentropy',
                                   metrics=['accuracy'])
        # Default start discriminator is not trainable
        for layer in self.discriminator.layers:
            layer.trainable = False

        self.decoder_discriminator.compile(optimizer=self.optimizer_discriminator,
                                           loss='binary_crossentropy',
                                           metrics=['accuracy'])

    @staticmethod
    def get_encoder_model(img_shape, encoded_dim):
        encoder = Sequential()
        encoder.add(Flatten(input_shape=img_shape))
        encoder.add(Dense(1000, activation='relu'))
        encoder.add(Dense(1000, activation='relu'))
        encoder.add(Dense(encoded_dim))
        encoder.summary()
        return encoder

    @staticmethod
    def get_decoder_model(encoded_dim, img_shape):
        decoder = Sequential()
        decoder.add(Dense(1000, activation='relu', input_dim=encoded_dim))
        decoder.add(Dense(1000, activation='relu'))
        decoder.add(Dense(np.prod(img_shape), activation='sigmoid'))
        decoder.add(Reshape(img_shape))
        decoder.summary()
        return decoder

    @staticmethod
    def get_discriminator_model(img_shape):
        discriminator = Sequential()
        discriminator.add(Flatten(input_shape=img_shape))
        discriminator.add(Dense(1000, activation='relu',
                                kernel_initializer=initializer,
                                bias_initializer=initializer))
        discriminator.add(Dense(1000, activation='relu', kernel_initializer=initializer,
                                bias_initializer=initializer))
        discriminator.add(Dense(1, activation='sigmoid', kernel_initializer=initializer,
                                bias_initializer=initializer))
        discriminator.summary()
        return discriminator

    def imagegrid(self, epochnumber):
        fig = plt.figure(figsize=[20, 20])
        for i in range(-5, 5):
            for j in range(-5, 5):
                topred = np.array((i * 0.5, j * 0.5))
                topred = topred.reshape((1, 2))
                img = self.decoder.predict(topred)
                img = img.reshape(self.img_shape)
                ax = fig.add_subplot(10, 10, (i + 5) * 10 + j + 5 + 1)
                ax.set_axis_off()
                ax.imshow(img)
        fig.savefig(str(epochnumber) + ".png")
        plt.show()
        plt.close(fig)

    def train(self, x_train_input, batch_size=128, epochs=5):
        fileNames = glob.glob('models/weights_mnist_autoencoder.*')
        fileNames.sort()
        if len(fileNames) != 0:
            saved_epoch = int(fileNames[-1].split('.')[1])
            self.autoencoder.load_weights(fileNames[-1])
        else:
            saved_epoch = -1
        if saved_epoch < epochs - 1:
            self.autoencoder.fit(x_train_input, x_train_input, batch_size=batch_size,
                                 epochs=epochs,
                                 callbacks=[
                                     keras.callbacks.ModelCheckpoint('models/weights_autoencoder.{epoch:02d}.hdf5',
                                                                     verbose=0,
                                                                     save_best_only=False,
                                                                     save_weights_only=False,
                                                                     mode='auto',
                                                                     period=1),
                                     keras.callbacks.EarlyStopping(monitor='loss', patience=3, min_delta=1e-4,
                                                                   restore_best_weights=True)])
        print("Training KDE")
        codes = self.encoder.predict(x_train_input)
        self.kde = KernelDensity(kernel='gaussian', bandwidth=3.16).fit(codes)
        print("Initial Training of discriminator")
        fileNames = glob.glob('models/weights_mnist_discriminator.*')
        fileNames.sort()
        if len(fileNames) != 0:
            saved_epoch = int(fileNames[-1].split('.')[1])
            self.discriminator.load_weights(fileNames[-1])
        else:
            saved_epoch = -1

        train_count = len(x_train_input)
        if saved_epoch < epochs - 1:
            # Combine real and fake images for discriminator training
            imgs_fake = self.generate(n=train_count)
            valid = np.ones((train_count, 1))  # result for training images
            fake = np.zeros((train_count, 1))  # result for generated fakes
            labels = np.vstack([valid, fake])  # combine together
            images = np.vstack([x_train_input, imgs_fake])
            # Train the discriminator
            self.discriminator.fit(images, labels, epochs=epochs, batch_size=batch_size, shuffle=True,
                                   callbacks=[
                                       keras.callbacks.ModelCheckpoint(
                                           'models/weights_discriminator.{epoch:02d}.hdf5',
                                           verbose=0,
                                           save_best_only=False,
                                           save_weights_only=False,
                                           mode='auto',
                                           period=1),
                                       keras.callbacks.EarlyStopping(monitor='loss', patience=3, min_delta=1e-4,
                                                                     restore_best_weights=True)])

        print("Training GAN")
        self.generateAndPlot(x_train_input, fileName="before_gan.png")
        self.trainGAN(x_train_input, epochs=int(train_count / batch_size), batch_size=batch_size)
        self.generateAndPlot(x_train_input, fileName="after_gan.png")

    def trainGAN(self, x_train_input, epochs=1000, batch_size=128):
        half_batch = int(batch_size / 2)
        for epoch in range(epochs):
            # ---------------Train Discriminator -------------
            # Select a random half batch of images
            idx = np.random.randint(0, x_train_input.shape[0], half_batch)
            imgs_real = x_train_input[idx]
            # Generate a half batch of new images
            imgs_fake = self.generate(n=half_batch)
            valid = np.ones((half_batch, 1))
            fake = np.zeros((half_batch, 1))
            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(imgs_real, valid)
            d_loss_fake = self.discriminator.train_on_batch(imgs_fake, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
            codes = self.kde.sample(batch_size)
            # Generator wants the discriminator to label the generated representations as valid
            valid_y = np.ones((batch_size, 1))
            # Train generator
            g_logg_similarity = self.decoder_discriminator.train_on_batch(codes, valid_y)
            # Plot the progress
            if epoch % 50 == 0:
                print("epoch %d [D accuracy: %.2f] [G accuracy: %.2f]" % (epoch, d_loss[1], g_logg_similarity[1]))

    def generate(self, n=10000):
        codes = self.kde.sample(n)
        images = self.decoder.predict(codes)
        return images

    def generateAndPlot(self, x_train_input, n=10, fileName="generated.png"):
        fig = plt.figure(figsize=[20, 20])
        images = self.generate(n * n)
        index = 1
        for image in images:
            image = image.reshape(self.img_shape)
            ax = fig.add_subplot(n, n + 1, index)
            index = index + 1
            ax.set_axis_off()
            ax.imshow(image)
            if index % (n + 1) == 0:
                nearest = findNearest(x_train_input, image)
                ax = fig.add_subplot(n, n + 1, index)
                index = index + 1
                ax.imshow(nearest)
        fig.savefig(fileName)
        plt.show()

    @staticmethod
    def mean_log_likelihood(x_test_input):
        KernelDensity(kernel='gaussian', bandwidth=0.2).fit(x_test_input)
Beispiel #37
0
    return P_norm


def density(reads, K0=3000, K1=100_000, kde_method="linear"):
    """
    Estimating density of distances between peace of reads
    :param reads: reads[i,0] - position first peace of read; reads[i,1] - position second peace of read
    :param K0: end of poly approximation
    :param K1: end of first log approximation
    :param K2: end of second log approximation
    :param kde_method: type of kernel in KernelDensity
    :return:
    """
    distances = np.abs(reads[:, 0] - reads[:, 1])

    kde = KernelDensity(kernel=kde_method, bandwidth=200).fit(distances.reshape(-1, 1))
    f = lambda x: kde.score_samples(x.reshape(-1, 1))

    # proximal
    degree = 30
    x0 = np.logspace(0, np.log10(K0 + 1000), 500)
    param0 = np.polyfit(x0, f(x0), degree)

    x1 = np.logspace(np.log10(K0 - 1000), np.log10(K1), 500)
    p = lambda x, a, b: a + b * np.log(x)
    param1, cov = curve_fit(p, x1, f(x1))

    P = (lambda x: np.where(x < K0, np.poly1d(param0)(x),
                            np.where(x < K1, param1[0] + param1[1] * np.log(x),
                                     param1[0] + param1[1] * np.log(x))))
Beispiel #38
0
def plotTrajAlignment():
    tau = -10  # transition time, use this to find the best b that gives slope a*b/4

    a = 1
    # b = 16/(a*tau)
    c = 4
    d = 5
    fSig = lambda x: a + d / (1 + np.exp((-4 / tau) * (x - c)))

    xMin = -12
    xMax = 25

    delta = 5
    xs = np.linspace(xMin, xMax, 50)

    fig = pl.figure(2, figsize=(6, 4))
    ax = pl.subplot(111)
    ax.set_frame_on(True)
    fig.subplots_adjust(top=0.75)

    # plot trajectory
    ax.plot(xs, fSig(xs), c=trajCol, linewidth=lw, label='Trajectory')

    np.random.seed(2)

    # plot points
    nrPoints = 19
    xsPoints = np.array([
        -10, -8.3, -7.0, -6.25, -5., -4.2, -2.4, -0.6, 0.4, 1.4, 3.2, 5.05,
        6.0, 6.9, 7.9, 8.75, 9.2, 10.25, 12.5
    ])
    diag = np.array([1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    unqDiags = np.unique(diag)
    nrDiags = unqDiags.shape[0]
    ysPoints = fSig(xsPoints)
    ysPointsPerturb = [0, 0]
    print('xsPoints', xsPoints, ysPoints)
    diagLabels = ['Controls', 'Patients']

    ax = pl.gca()
    xLim = (xMin, xMax - 9)
    yLim = (-4, 8)
    xOrigin = 0
    from sklearn.neighbors.kde import KernelDensity
    kdes = []
    kdeXs = np.linspace(xLim[0], xLim[1], num=100).reshape(-1, 1)
    kernelWidth = np.std(
        xsPoints) / 5.5  # need to test this parameter by visualisation
    for d in [1, 2]:
        ysPointsPerturb[d - 1] = ysPoints[diag == d] + np.random.normal(
            loc=0, scale=0.5, size=diag[diag == d].shape[0])
        ax.scatter(xsPoints[diag == d],
                   ysPointsPerturb[d - 1],
                   marker='x',
                   s=markerSize,
                   linewidths=lw,
                   c=diagCols[d],
                   label=diagLabels[d - 1])

        kdeCurr = KernelDensity(kernel='gaussian', bandwidth=kernelWidth).fit(
            xsPoints[diag == d].reshape(-1, 1))
        scores = np.exp(kdeCurr.score_samples(kdeXs))
        scaledScores = scores - np.min(scores) / (np.max(scores) -
                                                  np.min(scores))
        scaledScores = scaledScores * (yLim[1] - yLim[0]) * 2 + yLim[0]

        pl.fill_between(kdeXs.reshape(-1),
                        yLim[0],
                        scaledScores,
                        facecolor=diagCols[d],
                        alpha=0.4)

        maxScore = np.max(scaledScores) + 0.25
        maxInd = np.argmax(scaledScores)
        pl.text(kdeXs[maxInd] - 1.5,
                maxScore,
                diagLabels[d - 1],
                color=diagCols[d])

    pl.plot([xOrigin, xOrigin], [yLim[0], yLim[1]],
            '--',
            c=(0.5, 0.5, 0.5),
            linewidth=lw,
            label='Disease onset')

    # plt.plot(range(5), range(5), 'ro', markersize = 20, clip_on = False, zorder = 100)

    pl.xlim(xLim)
    pl.ylim(yLim)
    # pl.legend(ncol=2, loc='upper center')
    pl.legend(ncol=2, bbox_to_anchor=(0.05, 1.27, 0.95, .102))
    pl.xlabel('Years since $t_0$')
    pl.ylabel('Biomarker Z-Score')
    ax.set_yticks([-4, -2, 0, 2, 4, 6, 8])
    ax.set_yticklabels(['', '', '-3', '-2', '-1', '0', '1'])
    # ax.set_xticks(ax.get_xticks()[1:-2] + xOrigin)
    # ax.set_xticklabels(['-10', '0', '10'])
    pl.gcf().subplots_adjust(left=0.13, bottom=0.15, top=0.75)
    # ax.yaxis.set_label_coords(-0.1, 0.5)
    # ax.set_xlim((xLim[0] + xLimShift, xLim[1] + xLimShift))

    # boxprops = dict(linestyle = '--', linewidth = 3, color = 'blue')
    # medianprops = dict(linestyle = '-.', linewidth = 0.1, color = 'firebrick')
    # ax2 = pl.axes([0.03, 0, 0.1, 1], facecolor = (1, 1, 1, 0))
    # ax2.set_frame_on(False)
    # ax2.set_xlim((0,1))
    # ax2.set_ylim(yLim)
    # ax2.set_yticks([])
    # boxPos = [np.array([1.25]), np.array([1])]
    # # yDisp = [-0.3, 0.6]
    # yDisp = [-1.75, 0]
    # yScale = [1.2, 1]
    # yDisp = [11.5, 0]
    # yScale = [-1.2, 1]
    # nrDiags = 2
    # for d in range(nrDiags):
    #   print('ys %d ' % d, ysPointsPerturb[d]*yScale[d]+yDisp[d])
    #   bp = ax2.boxplot(ysPointsPerturb[d]*yScale[d]+yDisp[d], notch=0, sym='rs', vert=True, whis=1.75, widths=[0.1],
    #            positions=boxPos[d], showfliers=False, patch_artist=True, showmeans=True, medianprops=medianprops)
    #   pylab.setp(bp['boxes'], color = diagCols[unqDiags[d]])

    # make new axis ax3, with 0 - 1 limits
    # ax3 = pl.axes([0,0,1,1], facecolor=(1,1,1,0))
    # ax3.set_frame_on(False)
    #
    # #x,y = np.array([[0.05, 0.1, 0.9], [0.05, 0.5, 0.9]])
    # #line = lines.Line2D(x, y, lw=5., color='r', alpha=0.4)
    # ax3.set_xlim((0, 1))
    # ax3.set_ylim((0, 1))
    # # ax3.plot([0.1,0.56], [0.36, 0.36], '--', c=(0.5,0.5,0.5), linewidth=lw)
    # ax3.set_yticks([])
    # adjustFig(maxSize = (400, 400))
    fig.show()

    return fig
Beispiel #39
0
def _bivariate_kdeplot(x, y, xscale=None, yscale=None, shade=False,
                       bw="scott", gridsize=50, cut=3, clip=None, legend=True, 
                       legend_data = None, **kwargs):
    
    ax = plt.gca()
    
    # Determine the clipping
    clip = [(-np.inf, np.inf), (-np.inf, np.inf)]
        
    x = xscale(x)
    y = yscale(y)

    x_nan = np.isnan(x)
    y_nan = np.isnan(y)
    
    x = x[~(x_nan | y_nan)]
    y = y[~(x_nan | y_nan)]
    
    if bw == 'scott':
        bw_x = bw_scott(x)
        bw_y = bw_scott(y)
        bw = (bw_x + bw_y) / 2
    elif bw == 'silverman':
        bw_x = bw_silverman(x)
        bw_y = bw_silverman(y)
        bw = (bw_x + bw_y) / 2
    elif isinstance(bw, float):
        bw_x = bw_y = bw
    else:
        raise util.CytoflowViewError(None,
                                     "Bandwith must be 'scott', 'silverman' or a float")

    kde = KernelDensity(bandwidth = bw, kernel = 'gaussian').fit(np.column_stack((x, y)))
    
    x_support = _kde_support(x, bw_x, gridsize, cut, clip[0])
    y_support = _kde_support(y, bw_y, gridsize, cut, clip[1])
    
    xx, yy = np.meshgrid(x_support, y_support)
    z = kde.score_samples(np.column_stack((xx.ravel(), yy.ravel())))
    z = z.reshape(xx.shape)
    z = np.exp(z)

    n_levels = kwargs.pop("n_levels", 10)
    color = kwargs.pop("color")
    kwargs['colors'] = (color, )
    
    x_support = xscale.inverse(x_support)
    y_support = yscale.inverse(y_support)
    xx, yy = np.meshgrid(x_support, y_support)    
    
    contour_func = ax.contourf if shade else ax.contour
    try:
        cset = contour_func(xx, yy, z, n_levels, **kwargs)
    except ValueError as e:
        raise util.CytoflowViewError(None,
                                     "Something went wrong in {}, bandwidth = {}.  "
                                     .format(contour_func.__name__, bw)) from e
    num_collections = len(cset.collections)
    
    min_alpha = kwargs.pop("min_alpha", 0.2)
    if shade:
        min_alpha = 0
        
    max_alpha = kwargs.pop("max_alpha", 0.9)
    
    alpha = np.linspace(min_alpha, max_alpha, num = num_collections)
    for el in range(num_collections):
        cset.collections[el].set_alpha(alpha[el])

    # Label the axes
    if hasattr(x, "name") and legend:
        ax.set_xlabel(x.name)
    if hasattr(y, "name") and legend:
        ax.set_ylabel(y.name)
        
    # Add legend data
    if 'label' in kwargs:
        legend_data[kwargs['label']] = plt.Rectangle((0, 0), 1, 1, fc = color)

    return ax        
Beispiel #40
0
def game(players, *ps, **kwargs):
    ranks = {}

    for p in ps:
        # Record what place they got (0 is first place bc python is stupid)
        r = [x for x in range(len(ps)) if ps[x] == p]
        ranks[p] = r[0]
        if p not in players:
            print('Add "' + p + '" to players object with add_player()')
            return

    # Simulate n games
    n = 100000
    if 'n' in kwargs:
        n = int(kwargs['n'])

    # Define function that represents a single simulation
    def single_simulation(ps):
        lambs = {}
        turns = {}

        # Cycle through each person to get their game performance
        for p in ps:

            # Simulate a true AVERAGE of turns for each person from their prior
            # distribution.
            lambs[p] = abs(players[p][-1].sample(1))

            # From this average generate how many turns it would take them to
            # finish a game from a Poisson distribution. Generating multiple
            # numbers per game to serve as a tie breaker.
            turns[p] = list(np.random.poisson(lambs[p], 10))

        # Record the final position of each player in the simulated game
        result = sorted(turns.keys(), key=lambda x: turns[x])
        s_rank = {}
        for i in range(len(result)):
            s_rank[result[i]] = i

        return ([s_rank, lambs])

    # Do this by repeating ps n times in a list
    ps_list = itertools.repeat(ps, n)
    games = list(map(single_simulation, ps_list))

    # Pull out the lambdas that match the games that occured
    matching_results = [x[1] for x in games if x[0] == ranks]
    matching_n = len(matching_results)
    print(matching_n, 'matching games, or',
          round(matching_n / n, 3) * 100, 'percent')

    # if matching_results is less than 1000, then run the expected number of
    # iterations to get up to 1500 matching game results.
    if matching_n < 1000:
        addl_games = round(((n / matching_n) * (1500 - matching_n)))

        print('Too few matching results, running', addl_games,
              'more simulations')

        # Run additional simulations
        ps_list = itertools.repeat(ps, addl_games)
        tmp = map(single_simulation, ps_list)
        games += tmp

        matching_results = [x[1] for x in games if x[0] == ranks]
        print('Now', len(matching_results), 'matching games')

    # Trying to build a density off of more than 1000 points is computationally
    # expensive and pretty pointless. Sample out 1000 matching games to use for
    # density approximations.
    matching_results = random.sample(matching_results, 1000)

    # Calculate density approximation for everyone
    # This could be done in parallel to speed things up a little, but with the
    # 1000 game limit in matching_results, this shouldn't be too slow.
    for p in ps:
        # Pull out their matching turns to build matching distribution
        md = np.array([x[p][0][0] for x in matching_results]).reshape(-1, 1)

        # Determine the best bandwidth using the same method as in the
        # beginning of the script.
        upper = 1.06 * md.std()
        lower = 1.06 * md.std() / 20
        rng = np.arange(lower, upper, (upper - lower) / 10)
        bws = {}

        for bw in rng:
            kde = KernelDensity(bandwidth=bw)
            s = cross_val_score(kde, md, cv=5).mean()
            bws[bw] = s

        fbw = max(bws.keys(), key=lambda x: bws[x])
        players[p].append(KernelDensity(bandwidth=fbw).fit(md))
Beispiel #41
0
# keep the formatting consistent, it would be nice to have the type of object
# be the same for the first prior as all the other priors. As a result, we'll
# build a density approximation of a normal distribution and use that.

# Build density approximation using scikit learn:
# http://scikit-learn.org/stable/modules/cross_validation.html

# Use CV to get best bandwidth. Use kde.score() as the evualation metric.
X = np.array(np.random.normal(15, 3, 1000)).reshape(-1, 1)
upper = 1.06 * X.std()
lower = 1.06 * X.std() / 20
rng = np.arange(lower, upper, (upper - lower) / 20)
bws = {}

for bw in rng:
    kde = KernelDensity(bandwidth=bw)
    s = cross_val_score(kde, X, cv=5).mean()
    bws[bw] = s

fbw = max(bws.keys(), key=lambda x: bws[x])
kde = KernelDensity(bandwidth=fbw).fit(X)

# Define empty players dictionary
players = {}


# Define function that represents a single simulation
def single_simulation(ps):
    lambs = {}
    turns = {}
Beispiel #42
0
def cross_validate(test_data,bandwidths,n_folds=5):
    params = {'bandwidth': bandwidths}
    kf = KFold(n=len(test_data),n_folds=n_folds,shuffle=True,random_state=0)
    grid = GridSearchCV(KernelDensity(), params,cv=kf)
    grid.fit(test_data)
    return grid.best_estimator_.bandwidth,grid
Beispiel #43
0
def kde_histogram(x,x_range=None,bandwidth=None,fill=False,fill_properties=None,
                  line_properties=None,n_folds=3,printout=False,N_max=1000,zorder=0):
    '''
    --- A 1D method for plotting a kernel density estimate (rather than a histogram,
    for example) ---
    
    Inputs:
    -------
    x: x data
    
    x_range: range of the data. If None, then all of the *finite* data is used.
    
    fill: if True, the histogram will have a fill colour.
    
    fill_properties: _dictionary_ of terms for the histogram fill. Can take the keys
    'color' and 'alpha'. Default is 'k' and 0.5.
    
    line_properties: _dictionary_ of terms for the line properties. Can have the 
    keys 'color', 'alpha', 'linewidth', and 'linestyle' (defaults: 'k', 1, 1, 'solid'). 
    
    n_folds: number of folds for the cross validation if no bandwidth is provided.
    
    printout: if True, then the optimised bandwidth will be returned.
    
    N_max: maximum number of points to do the cross-validation on. If more data points
    are provided, a random selection will be used.
    
    zorder: where to 'overlay' the plot.
    
    Outputs:
    --------
    x_range: range of the data.
    
    bandwidth: bandwidth of the KDE.
    '''
    # set the line + fill properties here:
    ####################################
    fp = {'color':'k',
          'alpha':0.5}  

    lp = {'color':'k',
          'alpha':1,
          'linewidth':1,
          'linestyle':'solid'}
    
    if line_properties != None:
        for l in line_properties.keys():
            lp[l] = line_properties[l]
    if fill_properties != None:
        for f in fill_properties.keys():
            fp[f] = fill_properties[f]
    ####################################
    
    np.random.seed(0)
    
    # keep only the finite, 'good' data, or the data that is 
    # within the range of x specified:
    if x_range == None:
        select_x = np.isfinite(x)
        x_range = [np.min(x),np.max(x)]    
    else:
        select_x = (x >= x_range[0]) & (x < x_range[1])
    x = x[select_x][:,np.newaxis]
    x_std = np.std(x) # for scaling the cross-validation inputs
    if len(x) > N_max:
        x_test = np.random.choice(x.squeeze(),size=N_max,replace=False)
        x_test = x_test[:,np.newaxis]
    else:
        x_test = x.copy()
        
    if bandwidth == None:
        N_steps = 100
        bandwidths = np.logspace(-2,0,N_steps)*x_std
        bandwidth, grid = cross_validate(x_test,bandwidths,n_folds)
        if printout:
            print('Optimal bandwidth found: {0:.3f}'.format(bandwidth))
    
    kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(x)
    plot_x = np.linspace(x_range[0]-x_std,x_range[1]+x_std,100)[:,np.newaxis]
    plot_y = np.exp(kde.score_samples(plot_x))
    plot_x,plot_y = [plot_x.squeeze(),plot_y.squeeze()]
    
    if fill == True:
        _ = plt.fill_between(plot_x,0,plot_y,color=fp['color'],alpha=fp['alpha'],zorder=zorder)
    _ = plt.plot(plot_x,plot_y,color=lp['color'],alpha=lp['alpha']
                 ,lw=lp['linewidth'],linestyle=lp['linestyle'],zorder=zorder)
        
    return x_range,bandwidth
class DensityEstimator:
    def __init__(self,
                 training_set,
                 method_name,
                 n_components=None,
                 log_dir=None,
                 second_stage_beta=None):
        self.log_dir = log_dir
        self.training_set = training_set
        self.fitting_done = False
        self.method_name = method_name
        self.second_density_mdl = None
        self.skip_fitting_and_sampling = False
        if method_name == "GMM_Dirichlet":
            self.model = mixture.BayesianGaussianMixture(
                n_components=n_components,
                covariance_type='full',
                weight_concentration_prior=1.0 / n_components)
        elif method_name == "GMM":
            self.model = mixture.GaussianMixture(n_components=n_components,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_1":
            self.model = mixture.GaussianMixture(n_components=1,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_10":
            self.model = mixture.GaussianMixture(n_components=10,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_20":
            self.model = mixture.GaussianMixture(n_components=20,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_100":
            self.model = mixture.GaussianMixture(n_components=100,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_200":
            self.model = mixture.GaussianMixture(n_components=200,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)

        elif method_name.find("aux_vae") >= 0:
            have_2nd_density_est = False
            if method_name[8:] != "":
                self.second_density_mdl = method_name[8:]
                have_2nd_density_est = True
            self.model = VaeModelWrapper(
                input_shape=(training_set.shape[-1], ),
                latent_space_dim=training_set.shape[-1],
                have_2nd_density_est=have_2nd_density_est,
                log_dir=self.log_dir,
                sec_stg_beta=second_stage_beta)

        elif method_name == "given_zs":
            files = os.listdir(log_dir)
            for z_smpls in files:
                if z_smpls.endswith('.npy'):
                    break
            self.z_smps = np.load(os.path.join(log_dir, z_smpls))
            self.skip_fitting_and_sampling = True

        elif method_name.upper() == "KDE":
            self.model = KernelDensity(kernel='gaussian', bandwidth=0.425)
            # self.model = KernelDensity(kernel='tophat', bandwidth=15)
        else:
            raise NotImplementedError("Method specified : " +
                                      str(method_name) +
                                      " doesn't have an implementation yet.")

    def fitorload(self, file_name=None):
        if not self.skip_fitting_and_sampling:
            if file_name is None:
                self.model.fit(self.training_set, self.second_density_mdl)
            else:
                self.model.load(file_name)

        self.fitting_done = True

    def score(self, X, y=None):
        if self.method_name.upper().find(
                "AUX_VAE") >= 0 or self.skip_fitting_and_sampling:
            raise NotImplementedError(
                "Log likelihood evaluation for VAE is difficult. or skipped")
        else:
            return self.model.score(X, y)

    def save(self, file_name):
        if not self.skip_fitting_and_sampling:
            if self.method_name.find('vae') >= 0:
                self.model.save(file_name)
            else:
                with open(file_name, 'wb') as f:
                    pickle.dump(self.model, f)

    def reconstruct(self, input_batch):
        if self.method_name.upper().find("AUX_VAE") < 0:
            raise ValueError("Non autoencoder style density estimator: " +
                             self.method_name)
        return self.model.reconstruct(input_batch)

    def get_samples(self, n_samples):
        if not self.skip_fitting_and_sampling:
            if not self.fitting_done:
                self.fitorload()
            scrmb_idx = np.array(range(n_samples))
            np.random.shuffle(scrmb_idx)
            if self.log_dir is not None:
                pickle_path = os.path.join(self.log_dir,
                                           self.method_name + '_mdl.pkl')
                with open(pickle_path, 'wb') as f:
                    pickle.dump(self.model, f)
            if self.method_name.upper() == "GMM_DIRICHLET" or self.method_name.upper() == "AUX_VAE" \
                    or self.method_name.upper() == "GMM" or self.method_name.upper() == "GMM_1" \
                    or self.method_name.upper() == "GMM_10" or self.method_name.upper() == "GMM_20" \
                    or self.method_name.upper() == "GMM_100" or self.method_name.upper() == "GMM_200"\
                    or self.method_name.upper().find("AUX_VAE") >= 0:
                return self.model.sample(n_samples)[0][scrmb_idx, :]
            else:
                return np.random.shuffle(
                    self.model.sample(n_samples))[scrmb_idx, :]
        else:
            return self.z_smps
Beispiel #45
0
 def mean_log_likelihood(x_test_input):
     KernelDensity(kernel='gaussian', bandwidth=0.2).fit(x_test_input)
    def __init__(self,
                 training_set,
                 method_name,
                 n_components=None,
                 log_dir=None,
                 second_stage_beta=None):
        self.log_dir = log_dir
        self.training_set = training_set
        self.fitting_done = False
        self.method_name = method_name
        self.second_density_mdl = None
        self.skip_fitting_and_sampling = False
        if method_name == "GMM_Dirichlet":
            self.model = mixture.BayesianGaussianMixture(
                n_components=n_components,
                covariance_type='full',
                weight_concentration_prior=1.0 / n_components)
        elif method_name == "GMM":
            self.model = mixture.GaussianMixture(n_components=n_components,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_1":
            self.model = mixture.GaussianMixture(n_components=1,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_10":
            self.model = mixture.GaussianMixture(n_components=10,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_20":
            self.model = mixture.GaussianMixture(n_components=20,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_100":
            self.model = mixture.GaussianMixture(n_components=100,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)
        elif method_name == "GMM_200":
            self.model = mixture.GaussianMixture(n_components=200,
                                                 covariance_type='full',
                                                 max_iter=2000,
                                                 verbose=2,
                                                 tol=1e-3)

        elif method_name.find("aux_vae") >= 0:
            have_2nd_density_est = False
            if method_name[8:] != "":
                self.second_density_mdl = method_name[8:]
                have_2nd_density_est = True
            self.model = VaeModelWrapper(
                input_shape=(training_set.shape[-1], ),
                latent_space_dim=training_set.shape[-1],
                have_2nd_density_est=have_2nd_density_est,
                log_dir=self.log_dir,
                sec_stg_beta=second_stage_beta)

        elif method_name == "given_zs":
            files = os.listdir(log_dir)
            for z_smpls in files:
                if z_smpls.endswith('.npy'):
                    break
            self.z_smps = np.load(os.path.join(log_dir, z_smpls))
            self.skip_fitting_and_sampling = True

        elif method_name.upper() == "KDE":
            self.model = KernelDensity(kernel='gaussian', bandwidth=0.425)
            # self.model = KernelDensity(kernel='tophat', bandwidth=15)
        else:
            raise NotImplementedError("Method specified : " +
                                      str(method_name) +
                                      " doesn't have an implementation yet.")
Beispiel #47
0
def variable_score(variable, parents, data):
    score = 0
    if len(parents) == 0:
        #print(data)
        column = data[variable]
        #print(column)
        #kernel = kde.gaussian_kde(column.values)
        #
        #x = np.linspace(min(column.values), max(column.values), 1000)
        #print(kernel.covariance_factor())
        #plt.plot(x, np.log(kernel(x)))
        #plt.show()
        #sample = kernel.resample(5000)
        #kernel = kde.gaussian_kde(sample)
        #plt.plot(x, kernel(x))
        #plt.show()
        #start = time.time()
        #print(kernel.logpdf(column.values).sum())
        #print("scipy: ", time.time() - start)

        #grid = GridSearchCV(KernelDensity(), {'bandwidth': np.linspace(0.1,1.0,10)}, cv=10)
        #grid.fit(column.values[:, None])
        #print(grid.best_params_)

        vals = column.values[:, np.newaxis]

        #x = np.linspace(min(column.values), max(column.values), 1000)
        #kdens = KernelDensity(kernel='gaussian', bandwidth=1, rtol=0).fit(vals)
        #plt.plot(x, kdens.score_samples(x[:, np.newaxis]))
        #plt.show()

        start = time.time()
        kdens = KernelDensity(kernel='gaussian', bandwidth=0.2,
                              rtol=1E-2).fit(vals)
        plt.plot(sorted(vals, reverse=True),
                 kdens.score_samples(sorted(vals, reverse=True)))
        plt.show()
        print(kdens.score(vals))
        print("sklearn: ", time.time() - start)

        #array = np.unique(data[variable].values)
        #plt.scatter(array, [0] * len(array))
        #plt.plot(np.linspace(min(array), max(array), 1000), kernel(np.linspace(min(array), max(array), 1000)) )
        #plt.show()

        #start = time.time()
        #print(column.apply(event_score, args=(kernel,)).sum())
        #print("apply: ", time.time() - start)

        #start = time.time()
        #density = sm.nonparametric.KDEMultivariate(data=[column], var_type='c')
        #print(len(column.values), len(np.unique(column.values)))
        #print(np.log(density.pdf(column.values)).sum())
        #print("statsmodels: ", time.time() -  start)
    else:
        cols = parents + [variable]
        d = data[cols]
        #print(d)
        #print(d.values)
        samp = KernelDensity(kernel='gaussian', bandwidth=0.2,
                             rtol=1E-8).fit(d.values).sample(5000)
        score1 = KernelDensity(kernel='gaussian', bandwidth=0.2,
                               rtol=1E-8).fit(samp).score(d.values)
        samp = KernelDensity(kernel='gaussian', bandwidth=0.2,
                             rtol=1E-8).fit(data[parents].values).sample(5000)
        score2 = KernelDensity(kernel='gaussian', bandwidth=0.2,
                               rtol=1E-8).fit(samp).score(data[parents].values)
        print(variable, parents, score1, score2, score1 - score2)
        return score1 - score2
        #print(KernelDensity(bandwidth=0.2).fit([np.linspace(-5,5, 100)]).score_samples([np.linspace(-5,5, 100)]))
        #plt.plot(np.linspace(-5, 5, 100), KernelDensity(bandwidth=0.2).fit([np.linspace(-5,5, 100)]).score_samples([np.linspace(-5,5, 100)]))
        #plt.show()
    return score
Beispiel #48
0
Xd1 = np.array(X_std)
pcd = pca_data1.fit(Xd1).transform(X_std1)
print(pcd.shape)

# In[31]:

tuned_parameters = [{
    'kernel': ['rbf'],
    'gamma': [1e-3, 1e-4],
    'C': [1, 10, 100, 500, 1000]
}, {
    'kernel': ['linear'],
    'C': [1, 10, 100, 500, 1000]
}]
bds = 10 * np.linspace(-1, 1, 20)
clf = GridSearchCV(KernelDensity(kernel='gaussian'), {'bandwidth': bds}, cv=5)
clf.fit(pcd, y1)
bdwidth = clf.best_params_['bandwidth']

print("Bandwidth : ", bdwidth)
kde = KernelDensity(kernel='gaussian', bandwidth=bdwidth)
kde.fit(pcd)
print(kde)

# ### GMM

# In[33]:

n_comps = np.arange(1, 21)
clf_gauss_models = [
    GaussianMixture(n_components=n, covariance_type='full').fit(pcd)
Beispiel #49
0
    def get_flashes(self, prob_thresh=0.2):
        TGF_df = self.df[self.df['cluster'] == self.TGF_cluster[0]]
        TGF_df = TGF_df[TGF_df['prob'] > prob_thresh]
        #print(TGF_df)
        if len(
                TGF_df
        ) > 1 and self.pre_flash is not None and self.post_flash is not None:
            # I want to group things by times
            from sklearn.neighbors.kde import KernelDensity
            kde = KernelDensity(kernel='gaussian', bandwidth=0.25).fit(
                np.asarray(TGF_df['time_sep']).reshape(-1, 1))
            s = np.linspace(-1000, 1000, 5000)
            e = kde.score_samples(s.reshape(-1, 1))
            # plt.plot(s, np.exp(e))
            # plt.xlim(-600,600)
            # plt.ylim(0,0.05)
            #
            # plt.plot(TGF_df['time_sep'],np.zeros(len(TGF_df['time_sep'])),'b*')
            #

            from scipy.signal import argrelextrema
            mi, ma = argrelextrema(e,
                                   np.less)[0], argrelextrema(e, np.greater)[0]
            cuts = s[mi]

            cuts = np.insert(cuts, 0, -1000.0)
            cuts = np.append(cuts, 1000.0)

            flash_list = []
            #print(self.TGF_ID)
            for indx, cut in enumerate(cuts):
                if indx == len(cuts) - 1:
                    break
                group = TGF_df[TGF_df['time_sep'].between(cut, cuts[indx + 1])]
                for _ in group['time_sep']:
                    flash_list.append(indx)

            TGF_df['flash'] = flash_list
            location = TGF_df.loc[
                (round(TGF_df['time_sep'], 6) == round(self.TGF_time[0], 6))
                & round(TGF_df['lat'], 6).isin(self.TGF_lat)].index.values

            self.TGF_flash = TGF_df.ix[location]['flash'].values[0]
            self.TGF_flash_full = TGF_df[TGF_df['flash'] == self.TGF_flash]
            self.TGF_df = TGF_df
            self.pre_flash = self.TGF_df[self.TGF_df['flash'] ==
                                         self.TGF_flash - 1]
            self.post_flash = self.TGF_df[self.TGF_df['flash'] ==
                                          self.TGF_flash + 1]
            if len(self.post_flash['flash']) == 0:
                self.pre_flash, self.post_flash = (None, None)
                self.TGF_flash = 0
                self.dts = None
                self.dt_pre = None
                self.dt_post = None
                self.TGF_flash_full = None
            #plt.plot(cuts,np.zeros(len(cuts)), 'r.')
            #plt.show()
            else:
                dts = []
                for flash in set(TGF_df['flash']):
                    if flash > 0:
                        this_flash_chunk = TGF_df[TGF_df['flash'] == flash]
                        this_flash_start = this_flash_chunk.iloc[0]['time_sep']

                        prev_flash_chunk = TGF_df[TGF_df['flash'] == flash - 1]
                        prev_flash_end = prev_flash_chunk.iloc[-1]['time_sep']

                        dt = this_flash_start - prev_flash_end
                        dts.append(dt)
                self.dts = np.asarray(dts)
                self.dt_pre = self.TGF_flash_full['time_sep'].values[
                    0] - self.pre_flash['time_sep'].values[-1]
                self.dt_post = self.post_flash['time_sep'].values[
                    0] - self.TGF_flash_full['time_sep'].values[-1]
        else:
            TGF_df['flash'] = 0
            self.TGF_df = TGF_df
Beispiel #50
0
def Kernel_density_estimate(data, var_name1, var_name2, time, z):
    from sklearn.neighbors.kde import KernelDensity
    ''' Kerne Density Estimation:
    from sklearn.neighbors import KernelDensity

    Parameters:
    - bandwidth: The bandwidth here acts as a smoothing parameter, controlling the tradeoff between bias and variance
    in the result. A large bandwidth leads to a very smooth (i.e. high-bias) density distribution.
    A small bandwidth leads to an unsmooth (i.e. high-variance) density distribution.
    'metric': 'euclidean' (distance metric to use. Note that not all metrics are valid with all algorithms.)
    'atol': 0 (The desired absolute tolerance of the result.)
    'leaf_size': 40
    'kernel': 'gaussian'
    'rtol': 0 (The desired relative tolerance of the result. )
    'breadth_first': True
    'metric_params': None
    'algorithm': 'auto'
    '''
    amp = 100
    data_aux = np.ndarray(shape=((nx * ny), nvar))
    data_aux[:, 0] = data[:, 0]
    data_aux[:, 1] = data[:, 1] * amp

    # construct a kernel density estimate of the distribution
    print(" - computing KDE in spherical coordinates")
    # kde = KernelDensity(bandwidth=0.04, metric='haversine',
    #                     kernel='gaussian', algorithm='ball_tree')
    # kde.fit(Xtrain[ytrain == i])

    # Plotting
    n_sample = 100
    x_ = np.linspace(np.amin(data[:, 0]), np.amax(data[:, 0]), n_sample)
    y_ = np.linspace(np.amin(data[:, 1]), np.amax(data[:, 1]), n_sample)
    X, Y = np.meshgrid(x_, y_)
    XX = np.array([X.ravel(), Y.ravel()]).T

    x_aux = np.linspace(np.amin(data_aux[:, 0]), np.amax(data_aux[:, 0]),
                        n_sample)
    y_aux = np.linspace(np.amin(data_aux[:, 1]), np.amax(data_aux[:, 1]),
                        n_sample)
    X_aux, Y_aux = np.meshgrid(x_aux, y_aux)
    XX_aux = np.array([X_aux.ravel(), Y_aux.ravel()]).T

    fig = plt.figure(figsize=(12, 16))
    plt.subplot(3, 2, 1)
    bw = 5e-2
    kde = KernelDensity(kernel='gaussian', bandwidth=bw).fit(data_aux)
    # kde.score_samples(data)
    # Z = np.exp(kde.score_samples(XX_aux)).reshape(X.shape)
    Z_log = kde.score_samples(XX_aux).reshape(X.shape)
    plt.scatter(data_aux[:, 0], data_aux[:, 1], s=5, alpha=0.2)
    ax1 = plt.contour(X_aux, Y_aux, Z_log)
    plt.colorbar(ax1, shrink=0.8)
    labeling(var_name1, var_name2, amp)
    plt.title('bw = ' + str(bw))

    plt.subplot(3, 2, 2)
    bw = 1e-2
    kde = KernelDensity(kernel='gaussian', bandwidth=bw).fit(data_aux)
    # kde.score_samples(data)
    # Z = np.exp(kde.score_samples(XX_aux)).reshape(X.shape)
    Z_log = kde.score_samples(XX_aux).reshape(X.shape)
    plt.scatter(data_aux[:, 0], data_aux[:, 1], s=5, alpha=0.2)
    ax1 = plt.contour(X_aux, Y_aux, Z_log)
    plt.colorbar(ax1, shrink=0.8)
    labeling(var_name1, var_name2, amp)
    plt.title('bw = ' + str(bw))

    plt.subplot(3, 2, 3)
    bw = 8e-3
    kde = KernelDensity(kernel='gaussian', bandwidth=bw).fit(data_aux)
    # kde.score_samples(data)
    # Z = np.exp(kde.score_samples(XX_aux)).reshape(X.shape)
    Z_log = kde.score_samples(XX_aux).reshape(X.shape)
    plt.scatter(data_aux[:, 0], data_aux[:, 1], s=5, alpha=0.2)
    ax1 = plt.contour(X_aux, Y_aux, Z_log)
    plt.colorbar(ax1, shrink=0.8)
    labeling(var_name1, var_name2, amp)
    plt.title('bw = ' + str(bw))

    plt.subplot(3, 2, 4)
    bw = 5e-3
    kde = KernelDensity(kernel='gaussian', bandwidth=bw).fit(data_aux)
    # kde.score_samples(data)
    # Z = np.exp(kde.score_samples(XX_aux)).reshape(X.shape)
    Z_log = kde.score_samples(XX_aux).reshape(X.shape)
    plt.scatter(data_aux[:, 0], data_aux[:, 1], s=5, alpha=0.2)
    ax1 = plt.contour(X_aux, Y_aux, Z_log)
    plt.colorbar(ax1, shrink=0.8)
    labeling(var_name1, var_name2, amp)
    plt.title('bw = ' + str(bw))

    plt.subplot(3, 2, 5)
    bw = 2e-3
    kde = KernelDensity(kernel='gaussian', bandwidth=bw).fit(data_aux)
    # kde.score_samples(data)
    # Z = np.exp(kde.score_samples(XX_aux)).reshape(X.shape)
    Z_log = kde.score_samples(XX_aux).reshape(X.shape)
    plt.scatter(data_aux[:, 0], data_aux[:, 1], s=5, alpha=0.2)
    ax1 = plt.contour(X_aux, Y_aux, Z_log)
    plt.colorbar(ax1, shrink=0.8)
    labeling(var_name1, var_name2, amp)
    plt.title('bw = ' + str(bw))

    plt.subplot(3, 2, 6)
    bw = 1e-3
    kde = KernelDensity(kernel='gaussian', bandwidth=bw).fit(data_aux)
    # kde.score_samples(data)
    # Z = np.exp(kde.score_samples(XX_aux)).reshape(X.shape)
    Z_log = kde.score_samples(XX_aux).reshape(X.shape)
    plt.scatter(data_aux[:, 0], data_aux[:, 1], s=5, alpha=0.2)
    ax1 = plt.contour(X_aux, Y_aux, Z_log)
    plt.colorbar(ax1, shrink=0.8)
    labeling(var_name1, var_name2, amp)
    plt.title('bw = ' + str(bw))

    fig.suptitle('Cloud Closure: Kernel Density Estimate (gaussian)',
                 fontsize=20)
    plt.savefig(
        os.path.join(
            fullpath_out, 'CloudClosure_figures', 'CC_' + var_name1 + '_' +
            var_name2 + '_' + str(time) + '_z' + str(np.int(z)) + 'm_KDE.png'))
    plt.close()

    print('KDE shapes: ', kde.score_samples(XX).shape, X.shape)
    print(kde.get_params())

    return kde, kde
Beispiel #51
0
compilers = [
    "Intel_data.txt", "GCC_data.txt", "Clang_data.txt", "Intel_avg_data.txt",
    "GCC_avg_data.txt", "Clang_avg_data.txt", "MKL_data.txt"
]
for currentCompiler in compilers:
    data = np.genfromtxt(currentCompiler, skip_header=0)

    fig, ax = plt.subplots()
    ax.set_yscale('log')
    ax.hist(data, 30, normed=1, facecolor='green', alpha=0.75)
    #ax.axis([0, 25000, 0, 0.00015])

    from sklearn.neighbors.kde import KernelDensity
    import numpy as np
    m1 = np.min(data)
    m2 = np.max(data)
    dm = m2 - m1
    kde0 = KernelDensity(kernel='gaussian',
                         bandwidth=dm / 30).fit(data.reshape(-1, 1))
    X_plot = np.linspace(m1 - 0.2 * dm, m2 + 0.2 * dm, 1000).reshape(-1, 1)
    Dens0 = np.exp(kde0.score_samples(
        X_plot))  #score_samples возвращает логарифм плотности
    fig, ax = plt.subplots()
    ax.plot(X_plot, Dens0, color='blue')
    ax.set_yscale('log')
    ax.set_ylim(0.01, np.max(Dens0) * 1.1)
    #plt.show()

    save(currentCompiler, fmt='pdf')
    save(currentCompiler, fmt='png')
from IPython.display import Image
from sklearn.neighbors.kde import KernelDensity

f = open("crater_tuto")
#For python 3
#crater = pickle.load(f,encoding='latin1')
#For python 2
crater = pickle.load(f)
f.close()

plt.scatter(crater[:, 0], crater[:, 1], s=0.1)
plt.show()

#create 10 by 10 cubical complex:
xval = np.arange(0, 10, 0.05)
yval = np.arange(0, 10, 0.05)
nx = len(xval)
ny = len(yval)

#Now we compute the values of the kernel density estimator on the center of each point of our grid.
#The values will be stored in the array scores.
kde = KernelDensity(kernel='gaussian', bandwidth=0.3).fit(crater)
positions = np.array([[u, v] for u in xval for v in yval])
scores = -np.exp(kde.score_samples(X=positions))

#And subsequently construct a cubical complex based on the scores.
cc_density_crater = gd.CubicalComplex(dimensions=[nx, ny],
                                      top_dimensional_cells=scores)
# OPTIONAL
pers_density_crater = cc_density_crater.persistence()
plt = gd.plot_persistence_diagram(pers_density_crater).show()
Beispiel #53
0
	msa_vectors.append(np.ndarray.flatten(tools.convert_samp_to_one_hot(msa[samp], n_aa)))
msa_vectors = np.array(msa_vectors)
print msa_vectors.shape

#PCA
pca = PCA(n_components=20)
pca.fit(msa_vectors[1000:])
a_samps_pca = pca.transform(msa_vectors[1000:])
b_samps_pca = pca.transform(msa_vectors[:1000])
print a_samps_pca.shape

#KDE
# for bw in [.01, .1, 1., 10.]:
for bw in [ 1.]:

	kde = KernelDensity(kernel='gaussian', bandwidth=bw).fit(a_samps_pca)
	# density_train = kde.score_samples(msa_vectors)
	print bw, kde.score(b_samps_pca)

densities = kde.score_samples(b_samps_pca)
# densities = np.ones(1000)

#Scale densities to betw 0 and 1
min_density = np.min(densities)
densities = densities - min_density + 1.

weights = np.reciprocal(densities)

max_weights = np.max(weights)
weights = weights / max_weights
Beispiel #54
0
def get_groups(data, plot=True):
    bandwidth = 1.06 * np.std(data) * (len(data)**(-1 / 5)
                                       )  # Rosenblats rule of thumb
    s = np.linspace(0, np.max(data), 100)
    kde = KernelDensity(kernel='gaussian', bandwidth=max(bandwidth,
                                                         1e-10)).fit(data)
    e = kde.score_samples(s.reshape(-1, 1))
    e = np.exp(e)
    mins, maxs = argrelextrema(e, np.less)[0], argrelextrema(e, np.greater)[0]
    groups = []
    groups_idxs = []
    groups_maxs = []  # maximum likelihood point
    if len(mins) == 1:
        groups.append(data[data < s[mins[0]]])
        groups_idxs.append(np.where(data < s[mins[0]])[0])
        groups_maxs.append(
            get_most_prob_value(s[:mins[0] + 1], e[:mins[0] + 1]))

        groups.append(data[data >= s[mins[0]]])
        groups_idxs.append(np.where(data >= s[mins[0]])[0])
        groups_maxs.append(get_most_prob_value(s[mins[0]:], e[mins[0]:]))
    elif len(mins) == 0:
        groups = [data]
        groups_idxs.append(np.arange(0, len(data), 1))
        groups_maxs.append(get_most_prob_value(s, e))
    else:
        for i in range(len(mins)):
            min_lp = s[mins[i]]
            if i == 0:  # first one
                groups.append(data[data < min_lp])
                groups_idxs.append(np.where(data < min_lp)[0])
                groups_maxs.append(
                    get_most_prob_value(s[:mins[0] + 1], e[:mins[0] + 1]))

                next_mi = s[mins[i + 1]]
                groups.append(data[(data >= min_lp) * (data < next_mi)])
                groups_idxs.append(
                    np.where((data >= min_lp) * (data < next_mi))[0])
                groups_maxs.append(
                    get_most_prob_value(s[mins[i]:mins[i + 1] + 1],
                                        e[mins[i]:mins[i + 1] + 1]))

            elif i == len(mins) - 1:  # last one
                groups.append(data[data >= min_lp])
                groups_idxs.append(np.where(data >= min_lp)[0])
                groups_maxs.append(
                    get_most_prob_value(s[mins[i]:], e[mins[i]:]))
            else:
                next_mi = s[mins[i + 1]]
                groups.append(data[(data >= min_lp) * (data < next_mi)])
                groups_idxs.append(
                    np.where((data >= min_lp) * (data < next_mi))[0])
                groups_maxs.append(
                    get_most_prob_value(s[mins[i]:mins[i + 1] + 1],
                                        e[mins[i]:mins[i + 1] + 1]))

    if plot:
        plt.plot(s, e)
        print(groups_maxs)
        print([len(g) for g in groups])
        print([g[-5:] for g in groups])
        plt.plot(s[maxs], e[maxs], 'go', s[mins], e[mins], 'ro')
        for i in range(len(mins)):
            if i == 0:  # first one
                plt.plot(s[:mins[i] + 1], e[:mins[i] + 1])
            elif i == len(mins) - 1:  # last one
                plt.plot(s[mins[i]:], e[mins[i]:])
            else:
                plt.plot(s[mins[i]:mins[i + 1] + 1],
                         e[mins[i]:mins[i + 1] + 1])
        for i, d in enumerate(data):
            plt.plot(d, 0.01, 'bo', markersize=10)
        plt.show(block=False)
        plt.pause(0.5)
        plt.close()

    for k, (idx_group, group) in enumerate(zip(groups_idxs, groups)):
        for idx, d in zip(idx_group, group):
            if data[idx] != d:
                print(data[idx])
                print(d)
                print('sorcery')
            assert data[idx] == d
    print('ALP space : bins in [0 , {}], bandwidth={}, clusters={}'.format(
        np.max(data), bandwidth, [len(c) for c in groups]))
    return groups_idxs, groups, groups_maxs
Beispiel #55
0
def run(imagepath):
    t0 = time()

    image = io.imread(imagepath, as_gray=True)
    pyramid = pyramid_gaussian.get_pyramid(image)

    cfg.num_of_patches = cfg.num_test_patches  # changing the number of patches

    for img, sc_name in zip(pyramid, cfg.scale_names):
        if sc_name == '0_12':
            init_flag = True
            ss = []
            ns = 0

            patches, centres = sample_patches.create_patches_randomly(
                img, subshape=ss, initialization=init_flag)
            f = extract_features.extractFeaturesForPatches(patches)

            # 0: femur
            # 1: cadera
            # 2: superior
            # 3: inferior
            d_tilde, f_tilde, c_tilde = build_matrices(cfg.bone_structures[3],
                                                       sc_name,
                                                       n_subs=ns)

            l = d_tilde.shape[0] // 2  # number of landmarks

            # Obtener los puntos
            f_hat = np.concatenate((f_tilde, f), axis=1)
            c_bar = compute_C_matrix(centres, l)
            c = np.tile(centres, (l, 1))
            d = compute_D_matrix(f_hat, d_tilde, c_bar, l)
            data = d + c
            kde_ = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(data.T)
            sc = kde_.score_samples(data.T)
            max = np.argmax(np.exp(sc))
            shape = data[:, max]
            shape = np.reshape(shape, (l, 2))
            a = shape[:, 0]
            b = shape[:, 1]
            a *= 8
            b *= 8
            # fig, ax_ = plt.subplots()
            # ax_.imshow(image, cmap=plt.cm.gray)
            # ax_.plot(a, b, 'r.', markersize=8, mec='k', mew=0.3)
            # ax_.plot(a[5], b[5], 'b.', markersize=8, mec='k', mew=0.3)
            # ax_.plot(a[12], b[12], 'b.', markersize=8, mec='k', mew=0.3)
            # ax_.axis('off')
            # plt.show()
            a /= 4
            b /= 4

        else:
            for count in range(5):
                init_flag = False
                if count == 4:
                    ss = shape[(4 * count):(4 * count + 5), :]
                else:
                    ss = shape[(4 * count):(4 * count + 4), :]
                ns = count
                # if sc_name != '0_25':
                #     ss = shape[0:4,:]

                patches, centres = sample_patches.create_patches_randomly(
                    img, subshape=ss, initialization=init_flag)
                f = extract_features.extractFeaturesForPatches(patches)

                # 0: femur
                # 1: cadera
                # 2: superior
                # 3: inferior
                d_tilde, f_tilde, c_tilde = build_matrices(
                    cfg.bone_structures[3], sc_name, n_subs=ns)

                l = d_tilde.shape[0] // 2  # number of landmarks

                # Obtener los puntos
                f_hat = np.concatenate((f_tilde, f), axis=1)
                c_bar = compute_C_matrix(centres, l)
                c = np.tile(centres, (l, 1))
                d = compute_D_matrix(f_hat, d_tilde, c_bar, l)
                data = d + c
                kde_ = KernelDensity(kernel='gaussian',
                                     bandwidth=0.2).fit(data.T)
                sc = kde_.score_samples(data.T)
                max = np.argmax(np.exp(sc))
                shape1 = data[:, max]
                shape1 = np.reshape(shape1, (l, 2))
                # a = shape1[:, 0]
                # b = shape1[:, 1]
                # if sc_name == '0_25':
                #     a *= 4
                #     b *= 4
                # elif sc_name == '0_5':
                #     a *= 2
                #     b *= 2
                # fig, ax_ = plt.subplots()
                # ax_.imshow(image, cmap=plt.cm.gray)
                # ax_.plot(a, b, 'r.', markersize=8, mec='k', mew=0.3)
                # ax_.axis('off')
                # plt.show()
                # if sc_name == '0_25':
                #     a /= 2
                #     b /= 2
                if count == 4:
                    shape[(4 * count):(4 * count + 5), :] = shape1[0:5, :] * 2
                else:
                    shape[(4 * count):(4 * count + 4), :] = shape1[0:4, :] * 2

            a = shape[:, 0]
            b = shape[:, 1]
            if sc_name == '0_25':
                a = a * 2
                b = b * 2
            if sc_name == '1':
                a = a / 2
                b = b / 2
            # fig, ax_ = plt.subplots()
            # ax_.imshow(image, cmap=plt.cm.gray)
            # ax_.plot(a, b, 'r.', markersize=8, mec='k', mew=0.3)
            # ax_.plot(a[5], b[5], 'b.', markersize=8, mec='k', mew=0.3)
            # ax_.plot(a[12], b[12], 'b.', markersize=8, mec='k', mew=0.3)
            # ax_.axis('off')
            # plt.show()

    izquierdaX = np.copy(a)
    izquierdaY = np.copy(b)

    for img, sc_name in zip(pyramid, cfg.scale_names):
        if sc_name == '0_12':
            init_flag = True
            ss = []
            ns = 0

            patches, centres = sample_patches.create_patches_randomly(
                img, subshape=ss, initialization=init_flag)
            f = extract_features.extractFeaturesForPatches(patches)

            # 0: femur
            # 1: cadera
            # 2: superior
            # 3: inferior
            d_tilde, f_tilde, c_tilde = build_matrices(cfg.bone_structures[1],
                                                       sc_name,
                                                       n_subs=ns)

            l = d_tilde.shape[0] // 2  # number of landmarks

            # Obtener los puntos
            f_hat = np.concatenate((f_tilde, f), axis=1)
            c_bar = compute_C_matrix(centres, l)
            c = np.tile(centres, (l, 1))
            d = compute_D_matrix(f_hat, d_tilde, c_bar, l)
            data = d + c
            kde_ = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(data.T)
            sc = kde_.score_samples(data.T)
            max = np.argmax(np.exp(sc))
            shape = data[:, max]
            shape = np.reshape(shape, (l, 2))
            a = shape[:, 0]
            b = shape[:, 1]
            a *= 8
            b *= 8
            # fig, ax_ = plt.subplots()
            # ax_.imshow(image, cmap=plt.cm.gray)
            # ax_.plot(a, b, 'r.', markersize=8, mec='k', mew=0.3)
            # ax_.plot(a[5], b[5], 'b.', markersize=8, mec='k', mew=0.3)
            # ax_.axis('off')
            # plt.show()
            a /= 4
            b /= 4

        else:
            for count in range(5):
                init_flag = False
                if count == 4:
                    ss = shape[(4 * count):(4 * count + 5), :]
                else:
                    ss = shape[(4 * count):(4 * count + 4), :]
                ns = count
                # if sc_name != '0_25':
                #     ss = shape[0:4,:]

                patches, centres = sample_patches.create_patches_randomly(
                    img, subshape=ss, initialization=init_flag)
                f = extract_features.extractFeaturesForPatches(patches)

                # 0: femur
                # 1: cadera
                # 2: superior
                # 3: inferior
                d_tilde, f_tilde, c_tilde = build_matrices(
                    cfg.bone_structures[1], sc_name, n_subs=ns)

                l = d_tilde.shape[0] // 2  # number of landmarks

                # Obtener los puntos
                f_hat = np.concatenate((f_tilde, f), axis=1)
                c_bar = compute_C_matrix(centres, l)
                c = np.tile(centres, (l, 1))
                d = compute_D_matrix(f_hat, d_tilde, c_bar, l)
                data = d + c
                kde_ = KernelDensity(kernel='gaussian',
                                     bandwidth=0.2).fit(data.T)
                sc = kde_.score_samples(data.T)
                max = np.argmax(np.exp(sc))
                shape1 = data[:, max]
                shape1 = np.reshape(shape1, (l, 2))
                # a = shape1[:, 0]
                # b = shape1[:, 1]
                # if sc_name == '0_25':
                #     a *= 4
                #     b *= 4
                # elif sc_name == '0_5':
                #     a *= 2
                #     b *= 2
                # fig, ax_ = plt.subplots()
                # ax_.imshow(image, cmap=plt.cm.gray)
                # ax_.plot(a, b, 'r.', markersize=8, mec='k', mew=0.3)
                # ax_.axis('off')
                # plt.show()
                # if sc_name == '0_25':
                #     a /= 2
                #     b /= 2
                if count == 4:
                    shape[(4 * count):(4 * count + 5), :] = shape1[0:5, :] * 2
                else:
                    shape[(4 * count):(4 * count + 4), :] = shape1[0:4, :] * 2

            a = shape[:, 0]
            b = shape[:, 1]
            if sc_name == '0_25':
                a = a * 2
                b = b * 2
            if sc_name == '1':
                a = a / 2
                b = b / 2
            # fig, ax_ = plt.subplots()
            # ax_.imshow(image, cmap=plt.cm.gray)
            # ax_.plot(a, b, 'r.', markersize=8, mec='k', mew=0.3)
            # ax_.plot(a[5], b[5], 'b.', markersize=8, mec='k', mew=0.3)
            # ax_.plot(a[12], b[12], 'b.', markersize=8, mec='k', mew=0.3)
            # ax_.axis('off')
            # plt.show()

    derechaX = np.copy(a)
    derechaY = np.copy(b)

    fig, ax_ = plt.subplots()
    ax_.imshow(image, cmap=plt.cm.gray)
    ax_.plot(a, b, 'r.', markersize=5, mec='k', mew=0.3)
    ax_.plot(izquierdaX, izquierdaY, 'r.', markersize=8, mec='k', mew=0.3)
    ax_.plot(derechaX, derechaY, 'r.', markersize=8, mec='k', mew=0.3)

    IT = (izquierdaY[12] - izquierdaY[5]) / (izquierdaX[12] - izquierdaX[5])
    DT = (derechaY[12] - derechaY[5]) / (derechaX[12] - derechaX[5])

    a1 = 2 * izquierdaX[5] - izquierdaX[12]
    a2 = 2 * izquierdaX[12] - izquierdaX[5]
    b1 = IT * (a1 - izquierdaX[5]) + izquierdaY[5]
    b2 = IT * (a2 - izquierdaX[5]) + izquierdaY[5]

    c1 = 2 * derechaX[5] - derechaX[12]
    c2 = 2 * derechaX[12] - derechaX[5]
    d1 = DT * (c1 - derechaX[5]) + derechaY[5]
    d2 = DT * (c2 - derechaX[5]) + derechaY[5]

    HT = (derechaY[12] - izquierdaY[12]) / (derechaX[12] - izquierdaX[12])

    e1 = a1
    e2 = c1
    f1 = HT * (e1 - izquierdaX[12]) + izquierdaY[12]
    f2 = HT * (e2 - izquierdaX[12]) + izquierdaY[12]

    g1 = izquierdaX[5]
    g2 = HT * (g1 - izquierdaX[12]) + izquierdaY[12]
    h1 = derechaX[5]
    h2 = HT * (h1 - izquierdaX[12]) + izquierdaY[12]

    ax_.plot([e1, e2], [f1, f2], 'g', markersize=8, mec='k', mew=0.3)
    ax_.plot([c1, c2], [d1, d2], 'g', markersize=8, mec='k', mew=0.3)
    ax_.plot([a1, a2], [b1, b2], 'g', markersize=8, mec='k', mew=0.3)
    ax_.plot(izquierdaX[5],
             izquierdaY[5],
             'b.',
             markersize=10,
             mec='k',
             mew=0.3)
    ax_.plot(izquierdaX[12],
             izquierdaY[12],
             'b.',
             markersize=10,
             mec='k',
             mew=0.3)
    ax_.plot(derechaX[5], derechaY[5], 'b.', markersize=10, mec='k', mew=0.3)
    ax_.plot(derechaX[12], derechaY[12], 'b.', markersize=10, mec='k', mew=0.3)
    ax_.plot([g1, h1], [g2, h2], 'b.', markersize=10, mec='k', mew=0.3)

    nume1 = izquierdaY[5] * (izquierdaX[12] - g1) + izquierdaY[12] * (
        g1 - izquierdaX[5]) + g2 * (izquierdaX[5] - izquierdaX[12])
    deno1 = (izquierdaX[5] - izquierdaX[12]) * (izquierdaX[12] - g1) + (
        izquierdaY[5] - izquierdaY[12]) * (izquierdaY[12] - g2)
    rati1 = nume1 / deno1
    angl1 = math.atan(rati1)
    deg1 = (angl1 * 180) / math.pi
    if deg1 < 0:
        deg1 = deg1 + 180
    print(deg1)

    nume2 = derechaY[5] * (derechaX[12] - h1) + derechaY[12] * (
        h1 - derechaX[5]) + h2 * (derechaX[5] - derechaX[12])
    deno2 = (derechaX[5] - derechaX[12]) * (derechaX[12] - h1) + (
        derechaY[5] - derechaY[12]) * (derechaY[12] - h2)
    rati2 = nume2 / deno2
    angl2 = math.atan(rati2)
    deg2 = (angl2 * 180) / math.pi
    if deg2 < 0:
        deg2 = deg2 + 180
        deg2 = 180 - deg2
    print(deg2)

    ax_.text(izquierdaX[12] - 20,
             izquierdaY[12] + 20,
             round(deg1, 2),
             color='yellow')
    ax_.text(derechaX[12] + 20,
             derechaY[12] + 20,
             round(deg2, 2),
             color='yellow')

    print('####\tNiña\tNiño')

    na = 'N'
    no = 'N'

    #1-2
    if deg1 > 36 or deg2 > 36:
        na = 'L'
    if deg1 > 41.5 or deg2 > 41.5:
        na = 'G'
    if deg1 > 29 or deg2 > 31:
        no = 'L'
    if deg1 > 33 or deg2 > 35:
        no = 'G'
    print('1-2\t' + na + '\t' + no)

    #3-4
    if deg1 > 31.5 or deg2 > 33:
        na = 'L'
    if deg1 > 36.5 or deg2 > 38.5:
        na = 'G'
    if deg1 > 28 or deg2 > 29:
        no = 'L'
    if deg1 > 32.5 or deg2 > 33.5:
        no = 'G'
    print('3-4\t' + na + '\t' + no)

    #5-6
    if deg1 > 27.5 or deg2 > 29.5:
        na = 'L'
    if deg1 > 32 or deg2 > 34:
        na = 'G'
    if deg1 > 24.5 or deg2 > 27:
        no = 'L'
    if deg1 > 29 or deg2 > 31.5:
        no = 'G'
    print('5-6\t' + na + '\t' + no)

    #7-9
    if deg1 > 25.5 or deg2 > 27:
        na = 'L'
    if deg1 > 29.5 or deg2 > 31.5:
        na = 'G'
    if deg1 > 24.5 or deg2 > 25.5:
        no = 'L'
    if deg1 > 29 or deg2 > 29.5:
        no = 'G'
    print('7-9\t' + na + '\t' + no)

    #2a-3a
    if deg1 > 22 or deg2 > 23.5:
        na = 'L'
    if deg1 > 25.5 or deg2 > 27:
        na = 'G'
    if deg1 > 21 or deg2 > 22.5:
        no = 'L'
    if deg1 > 25 or deg2 > 27:
        no = 'G'
    print('2a-3a\t' + na + '\t' + no)

    #3a-5a
    if deg1 > 18 or deg2 > 21:
        na = 'L'
    if deg1 > 25.5 or deg2 > 25.5:
        na = 'G'
    if deg1 > 19 or deg2 > 20:
        no = 'L'
    if deg1 > 23.5 or deg2 > 24:
        no = 'G'
    print('3a-5a\t' + na + '\t' + no)

    ax_.axis('off')
    plt.show()
    '''
    l = d_tilde.shape[0] // 2  # number of landmarks

    # Composed matrix
    f_hat = np.concatenate((f_tilde, f), axis=1)

    c_bar = compute_C_matrix(centres, l)
    c = np.tile(centres, (l, 1))

    d = compute_D_matrix(f_hat, d_tilde, c_bar, l)

    positions_ = d + c

    density_estimation(positions_, img,imagepath)

    '''
    '''
Beispiel #56
0
    def _evaluate_vec(self, opts, step, real_points,
                      fake_points, validation_fake_points, prefix=''):
        """Compute the average log-likelihood and the Coverage metric.
        Coverage metric is defined in arXiv paper. It counts a mass of true
        data covered by the 95% quantile of the model density.
        """

        # Estimating density with KDE
        dist = fake_points[:-1] - fake_points[1:]
        dist = dist * dist
        dist = np.sqrt(np.sum(dist, axis=(1, 2, 3)))
        bandwidth = np.median(dist)
        num_real = len(real_points)
        num_fake = len(fake_points)
        if validation_fake_points is not None:
            max_score = -1000000.
            num_val = len(validation_fake_points)
            b_grid = bandwidth * (2. ** (np.arange(14) - 7.))
            for _bandwidth in b_grid:
                kde = KernelDensity(kernel='gaussian', bandwidth=_bandwidth)
                kde.fit(np.reshape(fake_points, [num_fake, -1]))
                score = np.mean(kde.score_samples(
                    np.reshape(validation_fake_points, [num_val, -1])))
                if score > max_score:
                    # logging.debug("Updating bandwidth to %.4f"
                    #             " with likelyhood %.2f" % (_bandwidth, score))
                    bandwidth = _bandwidth
                    max_score = score
        kde = KernelDensity(kernel='gaussian',
                            bandwidth=bandwidth)
        kde.fit(np.reshape(fake_points, [num_fake, -1]))

        # Computing Coverage, refer to Section 4.3 of arxiv paper
        model_log_density = kde.score_samples(
            np.reshape(fake_points, [num_fake, -1]))
        # np.percentaile(a, 10) returns t s.t. np.mean( a <= t ) = 0.1
        threshold = np.percentile(model_log_density, 5)
        real_points_log_density = kde.score_samples(
            np.reshape(real_points, [num_real, -1]))
        ratio_not_covered = np.mean(real_points_log_density <= threshold)

        log_p = np.mean(real_points_log_density)
        C = 1. - ratio_not_covered

        logging.info('Evaluating: log_p=%.3f, C=%.3f' % (log_p, C))
        return log_p, C
Beispiel #57
0
points_fg = np.array([img_input[x, y, :] for (x, y) in xy_fg])
points_bg = np.array([img_input[x, y, :] for (x, y) in xy_bg])

fig, axes = plt.subplots(nrows=2, ncols=1)
sns.distplot(points_fg[:, 0], ax=axes[0], color='r')
sns.distplot(points_fg[:, 1], ax=axes[0], color='g')
sns.distplot(points_fg[:, 2], ax=axes[0], color='b')
sns.distplot(points_bg[:, 0], ax=axes[1], color='r')
sns.distplot(points_bg[:, 1], ax=axes[1], color='g')
sns.distplot(points_bg[:, 2], ax=axes[1], color='b')


#расчет масок - самая долгая операция

kde_fg = KernelDensity(kernel='gaussian', 
                       bandwidth=1, 
                       algorithm='kd_tree', 
                       leaf_size=100).fit(points_fg)
kde_bg = KernelDensity(kernel='gaussian', 
                       bandwidth=1, 
                       algorithm='kd_tree', 
                       leaf_size=100).fit(points_bg)


score_kde_fg = np.zeros(img_input.shape[:2])
score_kde_bg = np.zeros(img_input.shape[:2])
likelihood_fg = np.zeros(img_input.shape[:2])
coodinates = it.product(range(score_kde_fg.shape[0]), 
                        range(score_kde_fg.shape[1]))
for x, y in tqdm_notebook(coodinates, 
                          total=np.prod(score_kde_fg.shape)):
    score_kde_fg[x, y] = np.exp(kde_fg.score(img_input[x, y, :].reshape(1, -1)))
Beispiel #58
0
import numpy
import pandas
from sklearn.neighbors.kde import KernelDensity
import matplotlib.pyplot as plt
from scipy.stats import norm

df = pandas.read_csv('C:/Udemy/SKLEARN-Python/004_visits_per_day.csv',
                     index_col=False,
                     header=0)
X_plot = numpy.linspace(-2, 2, 1000)[:, numpy.newaxis]
kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(df.values)
log_dens = kde.score_samples(X_plot)

#plt.hist(df.values, bins=numpy.linspace(0, 1, 20), fc='#AAAAFF', normed=True)

plt.hist(df.values, bins=numpy.linspace(0, 1, 10), fc='#AAAAFF', normed=True)

plt.plot(X_plot, numpy.exp(log_dens))
plt.show()

#########################################################################################

df = pandas.read_csv('C:/Udemy/SKLEARN-Python/004_visits_per_day.csv',
                     index_col=False,
                     header=0)
X_plot = numpy.linspace(-20, 1, 1000)[:, numpy.newaxis]
kde = KernelDensity(kernel='gaussian', bandwidth=1.4).fit(numpy.log(df.values))
# here I needed to change the bandwidth value to 1.4
log_dens = kde.score_samples(X_plot)

plt.hist(numpy.log(df.values),
                for chunk_id in bar(range(0, chunk_nu)):

                    col = chunks.get_chunk()[col_name]
                    ys = responses[chunk_id *
                                   max_chunk_size:chunk_id * max_chunk_size +
                                   col.shape[0]]
                    for i in range(0, col.shape[0], 1):
                        value = col.iloc[i]
                        y = ys[i]
                        if value != value:
                            cnts[y]['nan'] += 1
                        else:
                            cnts[y]['nu'].append(value)

                cnts[0]['nu'] = np.asarray(cnts[0]['nu']).reshape(-1, 1)
                cnts[1]['nu'] = np.asarray(cnts[1]['nu']).reshape(-1, 1)
                print('cal kde for 0...')
                if cnts[0]['nu'].size > 0:
                    cnts[0]['kde'] = KernelDensity(kernel='gaussian').fit(
                        cnts[0]['nu'])
                print('cal kde for 1...')
                if cnts[1]['nu'].size > 0:
                    cnts[1]['kde'] = KernelDensity(kernel='gaussian').fit(
                        cnts[1]['nu'])
                utils.save_variable(cnts, file_path)
        break
    except ValueError:
        print('get ValueError. Restart again.')

#%%
Beispiel #60
0
def plot_solid_liquid_ratio(temperature_next,
                            strain_lst,
                            nve_run_time_steps,
                            project_parameter,
                            debug_plot=True):
    cna_str = project_parameter['crystalstructure'].upper()
    ratio_lst = []
    for strain in strain_lst:
        job_name = get_nve_job_name(
            temperature_next=temperature_next,
            strain=strain,
            steps_lst=project_parameter['nve_run_time_steps_lst'],
            nve_run_time_steps=nve_run_time_steps)
        ham_nve = project_parameter['project'].load(job_name)
        struct = ham_nve.get_structure().center_coordinates_in_unit_cell()
        cna = struct.analyse_ovito_cna_adaptive(mode='str')
        bcc_count = sum(cna == 'BCC')
        fcc_count = sum(cna == 'FCC')
        hcp_count = sum(cna == 'HCP')
        if (cna_str == 'BCC' and bcc_count > fcc_count and bcc_count > hcp_count) or \
                (cna_str == 'FCC' and fcc_count > bcc_count and fcc_count > hcp_count) or \
                (cna_str == 'HCP' and hcp_count > bcc_count and hcp_count > fcc_count):
            # plt.figure(figsize=(16,12))
            bandwidth = (struct.get_volume() / len(struct))**(1.0 / 3.0)
            kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(
                struct.positions[:, 2][cna == cna_str].reshape(-1, 1))
            z_range = np.linspace(struct.positions[:, 2].min(),
                                  struct.positions[:, 2].max(), 1000)
            sample = kde.score_samples(z_range.reshape(-1, 1))
            gaussian_funct = np.exp(sample) / np.exp(sample).max()
            z_range_above_limit = z_range[np.where(gaussian_funct > 0.1)]
            z_range_below_limit = z_range[np.where(gaussian_funct < 0.1)]
            if len(z_range_above_limit) != 0:
                ratio_above = (np.max(z_range_above_limit)-np.min(z_range_above_limit)) / \
                              (np.max(z_range)-np.min(z_range))
            else:
                ratio_above = 1.0
            if len(z_range_below_limit) != 0:
                ratio_below = 1 - (np.max(z_range_below_limit)-np.min(z_range_below_limit)) / \
                              (np.max(z_range)-np.min(z_range))
            else:
                ratio_below = 0.0
            if ratio_below == 0.0:
                ratio = ratio_above
            elif ratio_above == 1.0:
                ratio = ratio_below
            else:
                ratio = np.min([ratio_below, ratio_above])
            ratio_lst.append(ratio)
        else:
            z_range = None
            gaussian_funct = None
            z_range_above_limit = None
            ratio = None
            ratio_lst.append(0.0)
        if debug_plot:
            plt.title('strain: ' + str(strain))
            plt.xlabel('position z')
            plt.ylabel('position x')
            plt.plot(struct.positions[:, 2],
                     struct.positions[:, 0],
                     'o',
                     label='all')
            plt.plot(struct.positions[:, 2][cna == 'BCC'],
                     struct.positions[:, 0][cna == 'BCC'],
                     'x',
                     label='BCC')
            plt.plot(struct.positions[:, 2][cna == 'FCC'],
                     struct.positions[:, 0][cna == 'FCC'],
                     'x',
                     label='FCC')
            plt.plot(struct.positions[:, 2][cna == 'HCP'],
                     struct.positions[:, 0][cna == 'HCP'],
                     'x',
                     label='HCP')
            cna_str_lst = struct.positions[:, 2][cna == cna_str]
            if len(cna_str_lst) != 0:
                plt.axvline(cna_str_lst.max(), color='red')
                plt.axvline(cna_str_lst.min(), color='red')
            plt.legend()
            plt.show()
            plt.xlabel('Position in z')
            plt.ylabel('kernel density score')
            plt.title('strain: ' + str(strain))
            if z_range is not None:
                plt.plot(z_range, gaussian_funct, label=cna_str)
                plt.axvline(np.min(z_range_above_limit),
                            color='black',
                            linestyle='--',
                            label='ratio: ' + str(ratio))
                plt.axvline(np.max(z_range_above_limit),
                            color='black',
                            linestyle='--')
            plt.axhline(0.1, color='red')
            plt.legend()
            plt.show()
    return ratio_lst