def plot_PDF_2VM(locs1, kaps1, wvm1, locs2, kaps2, wvm2, wu, Position=None): scal_ = 0.5 fig, ax = plt.subplots(1, 1, figsize=(9, 4)) titl = 'PDF of Mixture Model 2VMU - ' + Position ax.set_title(titl) lim_l = -np.pi / 2. lim_u = np.pi / 2. x_ = np.linspace(lim_l, lim_u, 100) x_ax = np.degrees(x_) jj = 0 for mu1, kap1, pvm1, mu2, kap2, pvm2, pu in \ zip(locs1, kaps1, wvm1, locs2, kaps2, wvm2, wu): jj += 1 f_VM1 = stats.vonmises(kap1, mu1, scal_) f_VM2 = stats.vonmises(kap2, mu2, scal_) f_Un = stats.uniform(loc=lim_l, scale=lim_u - lim_l) X_temp = pvm1*f_VM1.pdf ( x_ ) + pvm2*f_VM2.pdf ( x_ ) + \ pu*f_Un.pdf( x_ ) ax.plot(x_ax, X_temp, color='gray', linewidth=2, linestyle='-') #ax.plot(x_ax, X_temp, color='gray', linewidth=2, linestyle='-', \ # label='DP {} '.format(jj)) ax.set_xlabel(r'$\theta$ (degrees)', fontsize=12) ax.set_ylabel(r'$f(\theta)$', fontsize=12) ax.grid(color='gray', alpha=0.3, linestyle=':', linewidth=1) #ax.legend(loc=9) #ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), \ # fancybox=True, shadow=True, ncol=9) return fig, ax
def __init__(self, nSpuriousDims=0): self.nSpuriousDims = nSpuriousDims if nSpuriousDims > 0: mus1 = 0 * np.ones((1, nSpuriousDims + 1)) mus2 = 0 * np.ones((1, nSpuriousDims + 1)) mus1[0, 0] = mu mus2[0, 0] = mu + np.pi sds = sd * np.ones((1, nSpuriousDims + 1)) self.dist1 = stats.norm(loc=mus1, scale=sds) self.dist2 = stats.norm(loc=mus2, scale=sds) else: self.dist1 = stats.vonmises(kappa=10, loc=0) self.dist2 = stats.vonmises(kappa=10, loc=np.pi) self.last = 2
def testVonMisesSampleKsTest(self): concentrations_v = np.logspace(-3, 3, 50) # We are fixing the location to zero. The reason is that for loc != 0, # scipy's von Mises distribution CDF becomes shifted, so it's no longer # in [0, 1], but is in something like [-0.3, 0.7]. This breaks kstest. von_mises = tfd.VonMises(self.make_tensor(0.0), self.make_tensor(concentrations_v)) n = 10000 sample_values = self.evaluate( von_mises.sample(n, seed=tfp_test_util.test_seed())) self.assertEqual(sample_values.shape, (n, 50)) try: from scipy import stats # pylint:disable=g-import-not-at-top except ImportError: tf.compat.v1.logging.warn("Skipping scipy-dependent tests") return fails = 0 trials = 0 for concentrationi, concentration in enumerate(concentrations_v): s = sample_values[:, concentrationi] trials += 1 p = stats.kstest(s, stats.vonmises(concentration).cdf)[1] if p <= 0.05: fails += 1 self.assertLess(fails, trials * 0.1)
def plot_mixs_vonMises_General(mixX): """ Plot the PDF , CDF, SF, and PPF of a random variable that follows the von Mises distribution """ n_clus = mixX.n_clusters if n_clus != 1: fig, axes = plt.subplots(n_clus, 3, figsize=(12, 9)) for i in range(n_clus): # location in rads: loc = math.atan2( mixX.cluster_centers_[i,1], \ mixX.cluster_centers_[i,0]) # concentration: kappa = mixX.concentrations_[i] # weight: # weight = mixX.weights_[i] # construct the member von Mises: X = stats.vonmises(kappa, loc) # plot the PDF, CDF, SF and PPF, # by calling the function 'plot_rv_distribution': if n_clus == 1: plot_rv_distribution(X, axes=None) else: plot_rv_distribution(X, axes=axes[i, :])
def __init__( self, num=4, pos_range=(-np.pi, np.pi), vel_range=(-8.0, 8.0), act_range=(-2.0, 2.0), kappa=10, vel_std=1.0, act_std=0.5, ): """C-tor""" super().__init__(self.Type.OBSERVATION_ACTION) self.dim = num**2 pos_delta = pos_range[1] - pos_range[0] vel_delta = vel_range[1] - vel_range[0] act_delta = act_range[1] - act_range[0] pos_offset = pos_delta / (num) pos_basis_means = np.linspace(0, pos_delta - pos_offset, num) self.pos_rvs = [vonmises(kappa=kappa, loc=m) for m in pos_basis_means] vel_mean_diff = vel_delta / (num + 1) vel_basis_means = (np.linspace(vel_mean_diff * 0.5, vel_delta - vel_mean_diff * 0.5, num) + vel_range[0]) self.vel_rvs = [norm(loc=m, scale=vel_std) for m in vel_basis_means] act_mean_diff = act_delta / (num + 1) act_basis_means = (np.linspace(act_mean_diff * 0.5, act_delta - act_mean_diff * 0.5, num) + act_range[0]) self.act_rvs = [norm(loc=m, scale=act_std) for m in act_basis_means]
def testVonMisesSampleKsTest(self): concentrations_v = np.logspace(-3, 3, 50) # We are fixing the location to zero. The reason is that for loc != 0, # scipy's von Mises distribution CDF becomes shifted, so it's no longer # in [0, 1], but is in something like [-0.3, 0.7]. This breaks kstest. von_mises = tfd.VonMises(self.make_tensor(0.0), self.make_tensor(concentrations_v)) n = 10000 sample_values = self.evaluate(von_mises.sample(n, seed=137)) self.assertEqual(sample_values.shape, (n, 50)) try: from scipy import stats # pylint:disable=g-import-not-at-top except ImportError: tf.logging.warn("Skipping scipy-dependent tests") return fails = 0 trials = 0 for concentrationi, concentration in enumerate(concentrations_v): s = sample_values[:, concentrationi] trials += 1 p = stats.kstest(s, stats.vonmises(concentration).cdf)[1] if p <= 0.05: fails += 1 self.assertLess(fails, trials * 0.1)
def plot_PDF_1VM_b(FibersT): Position = FibersT.Position[0] locs = np.radians(FibersT.VM1_Ang) kaps = FibersT.VM1_Conc wvm = FibersT.VM1_Weig wu = FibersT.VM1_Weigu Angle_KMax = FibersT.Angle_KMax Angle_KMin = FibersT.Angle_KMin Angle_KMinMag1 = FibersT.Angle_KMinMag1 Angle_KMinMag2 = FibersT.Angle_KMinMag2 yones = np.ones(len(Angle_KMax)) scal_ = 0.5 fig, ax = plt.subplots(1, 1, figsize=(9, 4)) titl = 'PDF of Mixture Model 1VMU - ' + Position ax.set_title(titl) lim_l = -np.pi / 2. lim_u = np.pi / 2. x_ = np.linspace(lim_l, lim_u, 100) x_ax = np.degrees(x_) jj = 0 for mu, kap, pvm, pu in zip(locs, kaps, wvm, wu): jj += 1 f_VM = stats.vonmises(kap, mu, scal_) f_Un = stats.uniform(loc=lim_l, scale=lim_u - lim_l) X_temp = pvm * f_VM.pdf(x_) + pu * f_Un.pdf(x_) ax.plot(x_ax, X_temp, color='gray', linewidth=2, linestyle='-') #ax.plot(x_ax, X_temp, color='gray', linewidth=2, linestyle='-', \ # label='DP {} '.format(jj)) ax.set_xlabel(r'$\theta$ (degrees)', fontsize=12) ax.set_ylabel(r'$f(\theta)$', fontsize=12) ax.grid(color='gray', alpha=0.3, linestyle=':', linewidth=1) ax.plot(Angle_KMax, yones, 'x', color='black', label='KMax') ax.plot(Angle_KMin, 0.9 * yones, '+', color='gray', label='KMin') ax.plot(Angle_KMinMag1, 0.8 * yones, '^', markerfacecolor='white', markeredgecolor='b', label='MM1') ax.plot(Angle_KMinMag2, 0.7 * yones, 'v', markerfacecolor='white', markeredgecolor='g', label='MM2') ax.plot(FibersT.VM1_Ang, wvm, 'o', color='red', label='1VM') #ax.legend(loc=9) ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), \ fancybox=True, shadow=True, ncol=6) return fig, ax
def nLL(THETA, dth,dcp,dc): #a_, k_, A_, B_, C_, D_ = THETA #inferred paramter a_,k_,A_,B_ = THETA #P = sigmoid(A_, B_, C_, D_, dcp) P = sigmoid2(A_,B_,dc) #VM = np.exp(k_*np.cos((dth-a_*dcp)*d2r)) / (2*np.pi*iv(0,k_))#von Mises distribution #vm_par = vonmises.fit((dth-a_*dcp)*d2r, scale=1) rv = vonmises(k_)#(vm_par[0]) VM = rv.pdf((dth-a_*dcp)*d2r) marginalP = np.multiply((1-P), VM) + (1/(2*np.pi))*P nll = -np.sum(np.log(marginalP+1e-7))#, axis=1) #fst = np.einsum('ij,ij->i', 1-P, VM) #snd = np.sum(1/np.pi*P, axis=1) return np.sum(nll)
def myCDFvonMises( angles, values, res_vonMF ): ''' res_vonMF : array-like, shape=(n_clust,4) col-1 : direction in rads col-2 : direction in degrees col-3 : concentration (dispersion) col-4 : weight of member von Mises distribution ''' x = np.radians(angles) fX_tot = np.zeros(len(x),) cfX_tot = np.zeros(len(x),) dx = np.radians(abs(angles[0] - angles[1])) n_X = normalizeIntensity( angles, values ) cfX_exp = np.cumsum(n_X[:,1]*dx) n_clust = len(res_vonMF) for i in range(n_clust): temp_r = res_vonMF[i,0]; # location in rads # temp_d = np.round(res_vonMF[i,1], decimals=2); # location in degrees temp_c = res_vonMF[i,2]; # concentration temp_w = res_vonMF[i,3]; # weights fX_i = stats.vonmises( temp_c, temp_r ) # sum individual distributions weighted to get total mixture von Mises: fX_tot += temp_w*fX_i.pdf(x) cfX_tot += temp_w*fX_i.cdf(x) fig, ax = plt.subplots(1,1,figsize=(10,5)) ax.plot(x, np.cumsum(fX_tot*dx), 'r', lw=2, alpha=0.6, label='total cdf') ax.plot(x, cfX_tot, 'b--', lw=2, alpha=0.6, label='vonmises cdf') ax.plot(x, cfX_exp, 'k', lw=3, alpha=0.6, label='real cdf') ax.plot(x, fX_tot, 'm-', lw=2, alpha=0.6, label='vonmises pdf') ax.legend() # ax[0].plot(x, np.cumsum(res_vonMF[0,3]*vonmises.pdf(x, res_vonMF[0,2]) * dx), # 'b:', lw=2, alpha=0.6, label='cdf-1') # ax[0].plot(x, np.cumsum(res_vonMF[1,3]*vonmises.pdf(x, res_vonMF[1,2]) * dx), # 'g:', lw=2, alpha=0.6, label='cdf-2') # ax[0].plot(x, np.cumsum( (res_vonMF[0,3]*vonmises.pdf(x, res_vonMF[0,2]) \ # + res_vonMF[1,3]*vonmises.pdf(x, res_vonMF[1,2])) * dx), # 'm--', lw=2, alpha=0.6, label='sum of cdf1+cdf2') return fX_tot, cfX_tot, cfX_exp
def _make_tuples(self, key): repeats, poisson_rate, alpha, kappa = \ (PowerParameters() & key).fetch1['repeats', 'poisson_rate','alpha','kappa'] p = stats.poisson(poisson_rate) v = stats.vonmises(kappa) for trials in range(2, 15): print('Trials', trials) cut_off = self.compute_cutoff(poisson_rate, alpha, trials) beta = [] for r in range(repeats): n = p.rvs(trials) vs = np.mean([circ.resultant_vector_length(v.rvs(m) % (2*np.pi)) for m in n]) beta.append(vs < cut_off) self.insert1(dict(key, n=trials, power=1 - np.mean(beta)))
def nLL(THETA, dth, dcp, dc): """ negative log-likelihood objective function for fitting THETA includes parameter to be inferred and dth, dcp, dc are from recorded data """ k_, A_, B_, Amp, tau = THETA[0], THETA[1], THETA[2:7], THETA[7], THETA[ 8] #, THETA[8]#, THETA[9] #Kappa,A,Kdc,Kdcp,dc_amp,dcp_amp B_ = np.dot(B_, RaisedCosine_basis(len(K_win), 5)) #turning kernel (Kdc) #B_ = 100* B_/np.linalg.norm(B_) #P = sigmoid(A_, B_, C_, D_, dcp) P = sigmoid2(A_, B_, dc) rv = vonmises(k_) C_ = -Amp * np.exp(-K_win / tau) #W-V kernel (Kdcp) VM = rv.pdf((dth - np.dot(dcp, C_)) * d2r) marginalP = np.multiply((1 - P), VM) + (1 / (2 * np.pi)) * P nll = -np.sum(np.log(marginalP + 1e-9)) #, axis=1) return np.sum(nll)
def vonmises_KDE(data, kappa, length, plot=None): """ Create a kernal densisity estimate of circular data using the von mises distribution as the basis function. """ # imports from scipy.stats import vonmises from scipy.interpolate import interp1d # convert to radians data = np.radians(data) # set limits for von mises vonmises.a = -np.pi vonmises.b = np.pi x_data = np.linspace(-2*np.pi, 2*np.pi, length, endpoint=False) kernels = [] for d in data: # Make the basis function as a von mises PDF kernel = vonmises(kappa, loc=d) kernel = kernel.pdf(x_data) kernels.append(kernel) if plot: # For plotting kernel /= kernel.max() kernel *= .2 plt.plot(x_data, kernel, "grey", alpha=.5) vonmises_kde = np.sum(kernels, axis=0) vonmises_kde = vonmises_kde / np.trapz(vonmises_kde, x=x_data) f = interp1d( x_data, vonmises_kde ) if plot: plt.plot(x_data, vonmises_kde, c='red') return x_data, vonmises_kde, f
def _make_tuples(self, key): repeats, poisson_rate, alpha, kappa = \ (PowerParameters() & key).fetch1('repeats', 'poisson_rate','alpha','kappa') p = stats.poisson(poisson_rate) v = stats.vonmises(kappa) for trials in range(2, 15): print('Trials', trials) cut_off = self.compute_cutoff(poisson_rate, alpha, trials) beta = [] for r in range(repeats): n = p.rvs(trials) vs = np.mean([ circ.resultant_vector_length(v.rvs(m) % (2 * np.pi)) for m in n ]) beta.append(vs < cut_off) self.insert1(dict(key, n=trials, power=1 - np.mean(beta)))
def testVonMisesSampleKsTest(self): concentrations_v = np.logspace(-3, 3, 50) # We are fixing the location to zero. The reason is that for loc != 0, # scipy's von Mises distribution CDF becomes shifted, so it's no longer # in [0, 1], but is in something like [-0.3, 0.7]. This breaks kstest. von_mises = tfd.VonMises(self.make_tensor(0.0), self.make_tensor(concentrations_v)) n = 10000 sample_values = self.evaluate( von_mises.sample(n, seed=tfp_test_util.test_seed())) self.assertEqual(sample_values.shape, (n, 50)) fails = 0 trials = 0 for concentrationi, concentration in enumerate(concentrations_v): s = sample_values[:, concentrationi] trials += 1 p = sp_stats.kstest(s, sp_stats.vonmises(concentration).cdf)[1] if p <= 0.05: fails += 1 self.assertLess(fails, trials * 0.1)
def apc_models(shape_dict_list=[{'curvature': None, 'orientation': None} ], model_params_dict={'or_sd': [3.14], 'or_mean':[3.14], 'cur_mean':[1], 'cur_sd':[0.1]}): # make sure everything has the right dimensionality for broadcating for key in model_params_dict: vec = np.array(model_params_dict[key]) model_params_dict[key] = get_2d_dims_right(vec, dims_order=(1,0)) # make sure everything has the right dimensionality for broadcating, figure out a more succint way to do this for ind, a_shape in enumerate(shape_dict_list): for key in a_shape: vec = np.array(a_shape[key]) a_shape[key] = get_2d_dims_right(vec, dims_order= (0,1) ) shape_dict_list[ind] = a_shape #initialize our distributions von_rv = st.vonmises(kappa = model_params_dict['or_sd']**-1 , loc = model_params_dict['or_mean']) #von_rv = st.norm( scale = model_params_dict['or_sd'] , loc = model_params_dict['or_mean'] ) norm_rv = st.norm(scale=model_params_dict['cur_sd'], loc = model_params_dict['cur_mean']) model_resp = [] #get responses to all points for each axis ap and c then their product, then the max of all those points as the resp for i, apc_points in enumerate(shape_dict_list):#had to break this up per memory issues #print(i) model_resp_all_apc_points = von_rv.pdf(apc_points['orientation']) * norm_rv.pdf( apc_points['curvature']) model_resp.append(np.array([np.max(model_resp_all_apc_points, axis=0)])) #mean subtract model_resp = np.squeeze(np.array(model_resp)) model_resp = model_resp - np.mean(model_resp, axis = 0 ) #scale magnitude = np.linalg.norm( model_resp, axis = 0) model_resp = model_resp / magnitude return model_resp
def nLL(THETA, dth, dcp, dc): """ negative log-likelihood objective function for fitting THETA includes parameter to be inferred and dth, dcp, dc are from recorded data """ #a_, k_, A_, B_, C_, D_ = THETA #inferred paramter k_, A_, B_, Amp, tau = THETA[0], THETA[1], THETA[2:8], THETA[8], THETA[ 9] #, THETA[8]#, THETA[9] #Kappa,A,Kdc,Kdcp,dc_amp,dcp_amp #B_ = np.dot(B_,RaisedCosine_basis(len(K_win),5)) #test with basis function #B_ = 100* B_/np.linalg.norm(B_) #P = sigmoid(A_, B_, C_, D_, dcp) P = sigmoid2(A_, B_, dc) #VM = np.exp(k_*np.cos((dth-a_*dcp)*d2r)) / (2*np.pi*iv(0,k_))#von Mises distribution #vm_par = vonmises.fit((dth-a_*dcp)*d2r, scale=1) rv = vonmises(k_) #(vm_par[0]) C_ = -Amp * np.exp( -K_win / tau) #sign change due to the way simulated above VM = rv.pdf((dth - np.dot(dcp, C_)) * d2r) marginalP = np.multiply((1 - P), VM) + (1 / (2 * np.pi)) * P nll = -np.sum(np.log(marginalP + 1e-9)) #, axis=1) #fst = np.einsum('ij,ij->i', 1-P, VM) #snd = np.sum(1/np.pi*P, axis=1) return np.sum(nll)
#plt.plot(30*(temporal_kernel(theta_fit[2], K_win)),label='K_c_fit',linewidth=3) #plt.hold(True) #plt.plot(K_dc,'b--',label='K_c',linewidth=3) #plt.plot(30*np.exp(-K_win/theta_fit[3]),'r',label='K_cp_fit',linewidth=3) ##plt.hold(True) #plt.plot(K_dcp,'r--',label='K_cp',linewidth=3) #plt.legend() # %% ############################### ### check on von Mises density #plt.hist((data_th-alpha*data_dcp)*d2r,bins=100,normed=True,color='r'); aa, bb = np.histogram((data_th - np.dot(data_dcp, recKdcp)) * d2r, bins=200) plt.bar(bb[:-1], aa / len(data_th), align='edge', width=0.03, label='true') rv = vonmises(theta_fit[0]) #plt.scatter((data_th-alpha*data_dcp)*d2r,rv.pdf((data_th-alpha*data_dcp)*d2r),s=1,marker='.') plt.bar(bb[:-1], rv.pdf(bb[:-1]) * np.mean(np.diff(bb)), alpha=0.5, align='center', width=0.03, color='r', label='inferred') plt.axis([-.5, .5, 0, 0.5]) plt.legend() plt.xlabel('heading') plt.ylabel('pdf') #normalization by bin size??? #checking pdf density print('sum of histogram:', np.sum(aa / len(data_th)))
sigmoid2(5 * 0.023, K_dc, dc_n), 'o', color='grey', linewidth=5, label='true', alpha=0.1) plt.xlabel('W*dC') plt.ylabel('P(turn)') #,rotation='horizontal') #plt.grid(True) plt.legend() ### check on von Mises density plt.figure() aa, bb = np.histogram((data_th - np.dot(data_dcp, recKdcp)) * d2r, bins=200) plt.bar(bb[:-1], aa / len(data_th), align='edge', width=0.03, label='true') rv = vonmises(res.x[0]) #plt.scatter((data_th-alpha*data_dcp)*d2r,rv.pdf((data_th-alpha*data_dcp)*d2r),s=1,marker='.') plt.bar(bb[:-1], rv.pdf(bb[:-1]) * np.mean(np.diff(bb)), alpha=0.5, align='center', width=0.03, color='r', label='inferred') plt.axis([-.5, .5, 0, 0.5]) plt.legend() plt.xlabel('heading') plt.ylabel('pdf') #normalization by bin size??? #checking pdf density print('sum of histogram:', np.sum(aa / len(data_th)))
def myCDFvonMises(angles, values, res_vonMF): """ res_vonMF : array-like, shape=(n_clust,4) col-1 : direction in rads col-2 : direction in degrees col-3 : concentration (dispersion) col-4 : weight of member von Mises distribution """ x = np.radians(angles) p = np.linspace(0.0, 1.0, num=angles.size) fX_tot = np.zeros(len(x), ) cfX_tot = np.zeros(len(x), ) pfX_tot = np.zeros(len(x), ) dx = np.radians(abs(angles[0] - angles[1])) n_X = normalizeIntensity(angles, values) cfX_exp = np.cumsum(n_X[:, 1] * dx) n_clust = len(res_vonMF) for i in range(n_clust): temp_r = res_vonMF[i, 0] # location in rads # temp_d = np.round(res_vonMF[i,1], decimals=2); # location in degrees temp_c = res_vonMF[i, 2] # concentration temp_w = res_vonMF[i, 3] # weights fX_i = stats.vonmises(temp_c, temp_r) # sum individual distributions weighted to get total mixture von Mises: fX_tot += temp_w * fX_i.pdf(x) # retrieve the cdf: cfX_tot += temp_w * fX_i.cdf(x) # retrieve the ppf: pfX_tot += temp_w * fX_i.ppf(p) fig, ax = plt.subplots(1, 1, figsize=(5, 5)) ax.set_title('cdf and pdf') ax.plot(x, np.cumsum(fX_tot * dx), 'r', lw=2, alpha=0.6, label='total cdf') ax.plot(x, cfX_tot, 'b--', lw=2, alpha=0.6, label='vonmises cdf') ax.plot(x, cfX_exp, 'k', lw=3, alpha=0.6, label='real cdf') ax.plot(x, fX_tot, 'm-', lw=2, alpha=0.6, label='vonmises pdf') ax.legend() # ax[0].plot(x, np.cumsum(res_vonMF[0,3]*vonmises.pdf(x, res_vonMF[0,2]) * dx), # 'b:', lw=2, alpha=0.6, label='cdf-1') # ax[0].plot(x, np.cumsum(res_vonMF[1,3]*vonmises.pdf(x, res_vonMF[1,2]) * dx), # 'g:', lw=2, alpha=0.6, label='cdf-2') # ax[0].plot(x, np.cumsum( (res_vonMF[0,3]*vonmises.pdf(x, res_vonMF[0,2]) \ # + res_vonMF[1,3]*vonmises.pdf(x, res_vonMF[1,2])) * dx), # 'm--', lw=2, alpha=0.6, label='sum of cdf1+cdf2') fig, ax = plt.subplots(1, 3, figsize=(15, 5)) ax[0].set_title('cdf of von Mises') ax[0].set_ylabel('Cumulative distribution function') ax[0].set_xlabel(r'$\theta$ (degrees)') ax[0].plot(angles, cfX_tot, 'k', lw=3, alpha=0.6, label='total cdf') ax[1].set_title('Inverse cdf (ppf--percent point function) of von Mises') ax[1].set_ylabel('Quantile (degrees)') ax[1].set_xlabel('Cumulative probability') ax[1].plot(cfX_tot, angles, 'k', lw=3, alpha=0.6, label='inverse cdf') ax[2].plot(p, pfX_tot, 'k', lw=3, alpha=0.6, label='total ppf') #stats.probplot(cfX_tot, plot=ax[2]) for i in ax: i.legend() return fX_tot, cfX_tot, cfX_exp
def all_dists(): # dists param were taken from scipy.stats official # documentaion examples # Total - 89 return { "alpha": stats.alpha(a=3.57, loc=0.0, scale=1.0), "anglit": stats.anglit(loc=0.0, scale=1.0), "arcsine": stats.arcsine(loc=0.0, scale=1.0), "beta": stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0), "betaprime": stats.betaprime(a=5, b=6, loc=0.0, scale=1.0), "bradford": stats.bradford(c=0.299, loc=0.0, scale=1.0), "burr": stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0), "cauchy": stats.cauchy(loc=0.0, scale=1.0), "chi": stats.chi(df=78, loc=0.0, scale=1.0), "chi2": stats.chi2(df=55, loc=0.0, scale=1.0), "cosine": stats.cosine(loc=0.0, scale=1.0), "dgamma": stats.dgamma(a=1.1, loc=0.0, scale=1.0), "dweibull": stats.dweibull(c=2.07, loc=0.0, scale=1.0), "erlang": stats.erlang(a=2, loc=0.0, scale=1.0), "expon": stats.expon(loc=0.0, scale=1.0), "exponnorm": stats.exponnorm(K=1.5, loc=0.0, scale=1.0), "exponweib": stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0), "exponpow": stats.exponpow(b=2.7, loc=0.0, scale=1.0), "f": stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0), "fatiguelife": stats.fatiguelife(c=29, loc=0.0, scale=1.0), "fisk": stats.fisk(c=3.09, loc=0.0, scale=1.0), "foldcauchy": stats.foldcauchy(c=4.72, loc=0.0, scale=1.0), "foldnorm": stats.foldnorm(c=1.95, loc=0.0, scale=1.0), # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0), # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0), "genlogistic": stats.genlogistic(c=0.412, loc=0.0, scale=1.0), "genpareto": stats.genpareto(c=0.1, loc=0.0, scale=1.0), "gennorm": stats.gennorm(beta=1.3, loc=0.0, scale=1.0), "genexpon": stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0), "genextreme": stats.genextreme(c=-0.1, loc=0.0, scale=1.0), "gausshyper": stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0), "gamma": stats.gamma(a=1.99, loc=0.0, scale=1.0), "gengamma": stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0), "genhalflogistic": stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0), "gilbrat": stats.gilbrat(loc=0.0, scale=1.0), "gompertz": stats.gompertz(c=0.947, loc=0.0, scale=1.0), "gumbel_r": stats.gumbel_r(loc=0.0, scale=1.0), "gumbel_l": stats.gumbel_l(loc=0.0, scale=1.0), "halfcauchy": stats.halfcauchy(loc=0.0, scale=1.0), "halflogistic": stats.halflogistic(loc=0.0, scale=1.0), "halfnorm": stats.halfnorm(loc=0.0, scale=1.0), "halfgennorm": stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0), "hypsecant": stats.hypsecant(loc=0.0, scale=1.0), "invgamma": stats.invgamma(a=4.07, loc=0.0, scale=1.0), "invgauss": stats.invgauss(mu=0.145, loc=0.0, scale=1.0), "invweibull": stats.invweibull(c=10.6, loc=0.0, scale=1.0), "johnsonsb": stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0), "johnsonsu": stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0), "ksone": stats.ksone(n=1e03, loc=0.0, scale=1.0), "kstwobign": stats.kstwobign(loc=0.0, scale=1.0), "laplace": stats.laplace(loc=0.0, scale=1.0), "levy": stats.levy(loc=0.0, scale=1.0), "levy_l": stats.levy_l(loc=0.0, scale=1.0), "levy_stable": stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0), "logistic": stats.logistic(loc=0.0, scale=1.0), "loggamma": stats.loggamma(c=0.414, loc=0.0, scale=1.0), "loglaplace": stats.loglaplace(c=3.25, loc=0.0, scale=1.0), "lognorm": stats.lognorm(s=0.954, loc=0.0, scale=1.0), "lomax": stats.lomax(c=1.88, loc=0.0, scale=1.0), "maxwell": stats.maxwell(loc=0.0, scale=1.0), "mielke": stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0), "nakagami": stats.nakagami(nu=4.97, loc=0.0, scale=1.0), "ncx2": stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0), "ncf": stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0), "nct": stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0), "norm": stats.norm(loc=0.0, scale=1.0), "pareto": stats.pareto(b=2.62, loc=0.0, scale=1.0), "pearson3": stats.pearson3(skew=0.1, loc=0.0, scale=1.0), "powerlaw": stats.powerlaw(a=1.66, loc=0.0, scale=1.0), "powerlognorm": stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0), "powernorm": stats.powernorm(c=4.45, loc=0.0, scale=1.0), "rdist": stats.rdist(c=0.9, loc=0.0, scale=1.0), "reciprocal": stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0), "rayleigh": stats.rayleigh(loc=0.0, scale=1.0), "rice": stats.rice(b=0.775, loc=0.0, scale=1.0), "recipinvgauss": stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0), "semicircular": stats.semicircular(loc=0.0, scale=1.0), "t": stats.t(df=2.74, loc=0.0, scale=1.0), "triang": stats.triang(c=0.158, loc=0.0, scale=1.0), "truncexpon": stats.truncexpon(b=4.69, loc=0.0, scale=1.0), "truncnorm": stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0), "tukeylambda": stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0), "uniform": stats.uniform(loc=0.0, scale=1.0), "vonmises": stats.vonmises(kappa=3.99, loc=0.0, scale=1.0), "vonmises_line": stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0), "wald": stats.wald(loc=0.0, scale=1.0), "weibull_min": stats.weibull_min(c=1.79, loc=0.0, scale=1.0), "weibull_max": stats.weibull_max(c=2.87, loc=0.0, scale=1.0), "wrapcauchy": stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0), }
def plotMixvMises1X2(res_vonMF, im_name, n_X): """ To plot the fitted mixture of von Mises distributions Parameters: ----------- res_vonMF : array-like, shape=(n_clust,4), the results stored im_name : string, the name of the image file n_X : the normalized intensity Output: ----------- fig_name : an image .png file """ from PIL import Image, ImageDraw img_0 = Image.open(im_name) draw_0 = ImageDraw.Draw(img_0) draw_0.text((-1, 0), text=im_name, fill=100) im_cen = img_0.size pYmax = im_cen[1] #cen_x = int(min(im_cen)/2) #cen_y = int(max(im_cen)/2) cen_x = int(im_cen[0] / 2) cen_y = int(im_cen[1] / 2) angles = n_X[:, 0] nvalls = n_X[:, 1] n_clus = len(res_vonMF) # plot the distributions: x = np.linspace(-np.pi / 2, np.pi / 2, num=100) xdegs = np.degrees(x) fX_tot = np.zeros(len(x), ) fig, axes = plt.subplots(1, 3, figsize=(12, 6)) #axes[2].imshow(np.asarray(img_0), cmap='gray'); for i in range(n_clus): temp_r = res_vonMF[i, 0] # location in rads temp_d = np.round(res_vonMF[i, 1], decimals=2) # location in degrees temp_c = res_vonMF[i, 2] # concentration temp_w = res_vonMF[i, 3] # weights str_1 = 'von Mises for X_' + str(i + 1) fX_i = stats.vonmises(temp_c, temp_r) #fX_i = stats.vonmises(res_vonMF[i,2], res_vonMF[i,0]) axes[0].plot(xdegs, fX_i.pdf(x), label=str_1) # str_2 = 'weighted von Mises for X_' + str(i+1) str_2 = 'von Mises X_' + str(i + 1) axes[1].plot(xdegs, temp_w * fX_i.pdf(x), '--', label=str_2) # sum individual distributions weighted to get total mixture von Mises: fX_tot += temp_w * fX_i.pdf(x) # annotate as text the locations of the individual von Mises: axes[1].annotate( temp_d, xy=(temp_d, temp_w*fX_i.pdf(temp_r)), \ xytext=(temp_d, temp_w*fX_i.pdf(temp_r)), \ arrowprops=dict(facecolor='black', shrink=0.5), ) #tmpThe = temp_r - np.pi/2 start_x = cen_x * (1 - np.cos(temp_r)) start_y = cen_y * (1 - np.sin(temp_r)) start_y = pYmax - start_y end_x = cen_x * (1 + np.cos(temp_r)) end_y = cen_y * (1 + np.sin(temp_r)) end_y = pYmax - end_y draw_0.line([(start_x, start_y), (end_x, end_y)], fill=i + 1 + i * 50, width=2) axes[2].annotate( temp_d, xy=(end_x,end_y), xytext=(end_x,end_y), \ color='g') axes[1].plot(xdegs, fX_tot, 'r', label="combined von Mises") axes[1].plot(angles, nvalls, 'k--', label="normalized data") axes[1].set_title(im_name) axes[1].set_xlabel('degrees') axes[1].set_ylabel('pdf') # axes[1].legend(loc=1) # axes[1].legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3) axes[1].legend(bbox_to_anchor=(0.75, 0.99), loc=2, borderaxespad=0., ncol=2) axes[0].legend() axes[2].imshow(np.asarray(img_0), cmap='gray') # create a name for the output image file: fig_name = 'Res_' + im_name.replace(".png", "") + '_' + str(n_clus) + 'vM' fig_name = fig_name + '_fit' fig_name_eps = fig_name + '.eps' # save the .png file: fig.savefig(fig_name) fig.savefig(fig_name_eps) return
takes the np.log() @author: df """ import numpy as np from scipy import stats from statsmodels.base.model import GenericLikelihoodModel N = 1000 kappa_ = np.array((12.0)) loc_ = np.pi / 4 loc_cs = np.array((np.cos(loc_), np.sin(loc_))) print(loc_cs) # X = stats.vonmises.rvs(kappa, loc, size=N) X = stats.vonmises(kappa_, loc_) X_samples = X.rvs(N) def vonMis_pdf(x, kappa=kappa_, loc=loc_): return stats.vonmises.pdf(x, kappa, loc) class SinglevonMises(GenericLikelihoodModel): def __init__(self, endog, exog=None, **kwds): if exog is None: exog = np.zeros_like(endog) super(SinglevonMises, self).__init__(endog, exog, **kwds) def nloglikeobs(self, params):
def my_chisquare_GOF(X, Y_data, alpha=0.05): """ Function to get the GOF chi-square test and its p-value X: the observed r.s. Y_data: a data frame containing the estimated parameters of the model the model could be a mixture of von Mises distributions only or a mixture of von Mises and Uniform distributions. By convention: the Uniform distribution, if any, is stored at the last line of the dataFrame 'Y_data' alpha: the level of significance; default is 0.05 """ # the size of the data: N = len(X) # determine the frequency bins: nbin = 32 nbb = 100 # determine how many model parameters have been estimated by the data: if any(Y_data.Distribution == 'Uniform'): c = 3 * (len(Y_data) - 1) + 1 else: c = 3 * len(Y_data) # the degrees of freedom for the chi-squared test: ddf = nbin - 1 - c # plot the histogram of the data: fig, ax = plt.subplots(1, 1, figsize=(4, 3)) ax.set_title('From my_chisquare_GOF') # get the frequencies on every bin, and the bins: (nX1, bX1, pX1) = ax.hist(X, bins=nbin, density=False, \ label='r.s.', color = 'skyblue' ) print('# of frequencies per bin:', len(nX1)) print('# of bins:', len(bX1)) print('frequencies:', nX1) print('bins:', bX1) print('sum(nX1):', sum(nX1)) # get the observed frequencies and compute the CDF for the observed data: E1 = (np.cumsum(nX1)) / sum(nX1) fig, ax = plt.subplots(1, 1, figsize=(4, 3)) ax.set_title('From my_chisquare_GOF') (nXb, bXb, pXb) = ax.hist(X, bins=nbb, density=True, label='r.s.', color='skyblue') ax.plot(bX1[0:-1], E1, 'b', label='CDF r.s.') # the observed frequencies: O1 = nX1 # the expected frequencies, after fitting the r.s. to a model: scal_ = 0.5 kap_mle = np.array(Y_data.Concentration) loc_mle = np.array(Y_data.Location) w_mle = np.array(Y_data.Weight) fY_t = np.zeros(len(bX1), ) fY_b = np.zeros(len(bXb), ) cY_t = np.zeros(len(bX1), ) for mu, kap, wi in zip(loc_mle, kap_mle, w_mle): # a class member of a von Mises: fY_i = stats.vonmises(kap, mu, scal_) # the pdf of the member: fY_t += wi * fY_i.pdf(bX1) # the cdf of the member: # this may be wrong for the total CDF!!!: for some cases cY_i = wi * stats.vonmises.cdf(bX1, kap, mu, scal_) cY_t += cY_i exp1 = N * np.diff(cY_t) fY_b += wi * fY_i.pdf(bXb) ax.plot(bX1, cY_t, 'r', label='CDF fit (+=)') dx = np.diff(bX1) cY_ = np.zeros(len(bX1), ) cY_[0] = 0.0 cY_[1::] = np.cumsum(fY_t[0:-1] * dx) # exp1 = N*np.diff(cY_) # print('len(exp1):',len(exp1)) ax.plot(bX1, cY_, 'g', label='CDF fit (cumsum)') ax.legend() # get the chi-squared statistic based on the formula: chi2 = np.sum(((O1 - exp1)**2) / exp1) #chi2 = np.sum(((abs(O1 - exp1) - 0.5)**2)/exp1) # Find the p-value: # the effecrive dof: k - 1 - ddof p_value = 1 - stats.chi2.cdf(x=chi2, df=ddf) # get the critical value use k -1 - ddof crit_val = stats.chi2.ppf(q=1 - alpha, df=ddf) # get the chi-square statistic from scipy.stats function: # use as ddof the model parameters that are estimated from the sample: stats_chi2 = stats.chisquare(f_obs=O1, f_exp=exp1, ddof=c) if chi2 > crit_val: H0 = 'reject' else: H0 = 'do not reject' if stats_chi2[0] > crit_val: H0t = 'reject' else: H0t = 'do not reject' statistic_ = ["My chi-square", "scipy.chisquare"] chi2_ = np.array([chi2, stats_chi2[0]]) pval_ = np.array([p_value, stats_chi2[1]]) cval_ = np.array([crit_val, crit_val]) H0_ = (H0, H0t) chi2_results = pd.DataFrame({'Statistic': statistic_, \ 'chi^2': chi2_.ravel(), \ 'p-value': pval_.ravel(), \ 'critical value': cval_.ravel(), \ 'decision': H0_, \ 'alpha': alpha }) chi2_results = chi2_results[['Statistic', 'chi^2', 'critical value', \ 'p-value', 'alpha', 'decision']] print(chi2_results) # ---------------------------------------------------------------------- # # NOT CORRECT !!! # another test: use the pdfs in the formula: chi2b = np.sum(((nXb - fY_b[0:-1])**2) / fY_b[0:-1]) p_valueb = 1 - stats.chi2.cdf(x=chi2b, df=nbb - 1 - c) crit_valb = stats.chi2.ppf(q=1 - alpha, df=nbb - 1 - c) stats_chi2b = stats.chisquare(f_obs=nXb, f_exp=fY_b[0:-1], ddof=c) if chi2b > crit_valb: H0_b = 'reject' else: H0_b = 'do not reject' if stats_chi2b[0] > crit_valb: H0t_b = 'reject' else: H0t_b = 'do not reject' print('H0_b:', H0_b) print('H0t_b:', H0t_b) print('p_valueb:', p_valueb) print('chi2b:', chi2b) print('crit_valb:', crit_valb) print('stats_chi2b:', stats_chi2b) statistic_b = ["My chi-square", "scipy.chisquare"] chi2_b = np.array([chi2b, stats_chi2b[0]]) pval_b = np.array([p_valueb, stats_chi2b[1]]) cval_b = np.array([crit_valb, crit_valb]) H0b_ = (H0_b, H0t_b) chi2_resultsb = pd.DataFrame({'Statistic': statistic_b, \ 'chi^2': chi2_b.ravel(), \ 'p-value': pval_b.ravel(), \ 'critical value': cval_b.ravel(), \ 'decision': H0b_, \ 'alpha': alpha }) chi2_resultsb = chi2_resultsb[['Statistic', 'chi^2', 'critical value', \ 'p-value', 'alpha', 'decision']] print(chi2_resultsb) # ---------------------------------------------------------------------- # return chi2_results
dx = np.diff(bins) cdfX_samp = np.cumsum(n*dx) #fig, ax = plt.subplots(1, 1, figsize=(9,3)) ax[2].plot(bins[0:-1], cdfX_samp, 'b-.', label='ECDF (cumsum-1)') # b. get ECDF using the cumsum command (2nd way): EEE = (np.cumsum(n))/sum(n) ax[2].plot(bins[0:-1], EEE, 'g:', label='ECDF, (cumsum-2)') # c. get ECDF using ECDF() function: xx2, ee2 = DFst.ECDF( X_samples ) ax[2].plot(xx2, ee2, 'c:', lw=2, label='ECDF (ECDF.py)') # -------------------------------------------------------------------- # # get an instance of the von Mises model: fX_i = stats.vonmises( kappas, locs, scal_ ) # get the CDF of the postulated distribution: # using the ordered random sample: cfX_tot = fX_i.cdf(aa) ax[2].plot( aa, cfX_tot, 'r', label='CDF model' ) # plot the postulated pdf in the same graph as the histogram: ax[0].plot( aa, fX_i.pdf(aa), label='PDF model' ) ax[0].legend() ax[1].legend() ax[2].legend() # -------------------------------------------------------------------- # # create a separate plot for ECDF and CDF: fig, ax = plt.subplots(1, 1, figsize=(6,3)) ax.set_title('Fig 2. Random sample from von Mises distributions')
def check_vonmises_cdf_periodic(k,l,s,x): vm = stats.vonmises(k,loc=l,scale=s) assert_almost_equal(vm.cdf(x)%1,vm.cdf(x%(2*numpy.pi*s))%1)
kappa = 3.99 mean, var, skew, kurt = vonmises.stats(kappa, moments='mvsk') # Display the probability density function (``pdf``): x = np.linspace(vonmises.ppf(0.01, kappa), vonmises.ppf(0.99, kappa), 100) ax.plot(x, vonmises.pdf(x, kappa), 'r-', lw=5, alpha=0.6, label='vonmises pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = vonmises(kappa) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = vonmises.ppf([0.001, 0.5, 0.999], kappa) np.allclose([0.001, 0.5, 0.999], vonmises.cdf(vals, kappa)) # True # Generate random numbers: r = vonmises.rvs(kappa, size=1000) # And compare the histogram: ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
# ax = fig1.add_subplot(111) # ax.plot(x, y) # plt.show() # x = np.linspace(0, 5, 10, endpoint=False) # y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y # array([ 0.00108914, 0.01033349, 0.05946514, 0.20755375, 0.43939129, # 0.56418958, 0.43939129, 0.20755375, 0.05946514, 0.01033349]) # fig1 = plt.figure() # ax = fig1.add_subplot(111) # ax.plot(x, y) # The input quantiles can be any shape of array, as long as the last axis labels the components. This allows us for instance to display the frozen pdf for a non-isotropic random variable in 2D as follows: kappa = 3.99390425811 rv = vonmises([1,1],loc=[0,0]) print rv.pdf([0,0]) # x, y = np.mgrid[-1:1:.01, -1:1:.01] # pos = np.empty(x.shape + (2,)) # pos[:, :, 0] = x; pos[:, :, 1] = y # # rv = multivariate_normal([0.5, 0.5], [1.0, 1.0], allow_singular=True) # fig2 = plt.figure() # ax2 = fig2.add_subplot(111) # ax2.contourf(x, y, rv.pdf(pos)) plt.show()
def nat_to_scipy_distribution(self, q: VonMisesFisherNP) -> Any: return ss.vonmises(*q.to_kappa_angle())
def my_KS_GOF_mvM(X, Y_data, alpha=0.05): """ KEEP this code! Function to get the GOF Kolmogorov-Smirnov test and its p-value CAUTION 1: This function is for mixtures of von Mises distributions CAUTION 2: X should be a random sample that can be related to a continuous distribution It is NOT suitable for data like the INTENSITY of light we get from the FFT of an image: ->> NEEDS modification. X: the observed r.s., relevant to the nature of the problem they should be RANDOM!!! avoid equally-spaced! Y_data: a data frame containing the estimated parameters of the model the model could be a mixture of von Mises distributions only or a mixture of von Mises and Uniform distributions. By convention: the Uniform distribution, if any, is stored at the last line of the dataFrame 'Y_data' alpha: the level of significance; default is 0.05 """ N = len(X) xx = np.sort(X) # CAUTION: NEVER USE EQUALLY-SPACED r.s. WITH K-S STATISTIC!!! #xx = np.linspace(start=min(xx),stop=max(xx),num=len(xx)) # the values of the theoretical (model) distribution for the r.s. X is: scal_ = 0.5 # to scale the distribution on the semi-circle kap_mle = np.array(Y_data.Concentration) loc_mle = np.array(Y_data.Location) w_mle = np.array(Y_data.Weight) cY_t = np.zeros(N, ) # initialize the CDF fY_t = np.zeros(N, ) # initialize the PDF for mu, kap, wi in zip(loc_mle, kap_mle, w_mle): # create a class of a single von Mises: fY_i = stats.vonmises(kap, mu, scal_) # take the pdf of the above single von Mises and add it to the model: fY_t += wi * fY_i.pdf(xx) # take the cdf of the above single von Mises and add it to the model: cY_t += wi * fY_i.cdf(xx) #cY_i = wi*stats.vonmises.cdf( xx, kap, mu, scal_ ) #cY_t +=cY_i if max(cY_t) > 1: cY_c = cY_t - (max(cY_t) - 1.) elif min(cY_t) < 0: cY_c = cY_t + abs(min(cY_t)) else: cY_c = cY_t # get the ECDF of the r.s.: # CAUTION: the x_values returned by ECDF() are equally-spaced!!! x_values, y_values = ECDF(X) # ------------------------------------- # # a. for the += case of computing the CDF: # compute the K-S test: cY_ = cY_c # ii = np.arange(0, 1+1/N, 1/N) # vec1 = cY_ - ii[0:-1] # vec2 = ii[1::] - cY_ ii = np.arange(0, 1, 1 / N) vec1 = cY_ - ii jj = np.arange(1 / N, 1 + 1 / N, 1 / N) vec2 = jj - cY_ # ------------------------------------- # # b. get the model CDF using cumsum: # compute the K-S test: dx = np.diff(xx) cY_b = np.ones(len(xx), ) cY_b[0:-1] = np.cumsum(fY_t[0:-1] * dx) # cY_ = cY_b # ii = np.arange(0, 1, 1/N) # vec1 = cY_ - ii # jj = np.arange(1/N, 1+1/N, 1/N) # vec2 = jj - cY_ # # ------------------------------------- # Dm = np.array([vec1, vec2]) D = max(np.max(Dm, axis=0)) sqnD = (np.sqrt(N)) * D print('sqrt(N))*D =', sqnD) pval = 1 - KS_CDF(sqnD) print('p-value:', pval) pval_mod = 1 - KS_CDF(sqnD + 1. / (6. * np.sqrt(N))) print('p-value-mod:', pval_mod) fig, ax = plt.subplots(1, 1, figsize=(4, 3)) ax.set_title('From my_KS_GOF') ax.plot(xx, fY_t, 'b', lw=2, alpha=0.6, label='PDF fit') ax.plot(xx, cY_b, 'r', lw=2, alpha=0.6, label='CDF fit (cumsum)') ax.plot(xx, cY_c, 'g:', lw=2, alpha=0.6, label='CDF fit (+=)') ax.plot(x_values, y_values, 'c-.', lw=2, alpha=0.6, label='ECDF') ax.legend() # the scipy.kstest function returns the following: #print(stats.kstest( X, pY_t )) #stats.kstest( Y5, 'vonmises', args=(s5[0], s5[1], scal_), alternative = 'greater' ) # critical points: d001 = 1.63 / np.sqrt(N) d005 = 1.36 / np.sqrt(N) d010 = 1.22 / np.sqrt(N) # if you want to check, do this: # P(1.63) = 1 - KS_CDF( 1.63 ) = 0.01 # P(1.36) = 1 - KS_CDF( 1.36 ) = 0.05 # P(1.22) = 1 - KS_CDF( 1.22 ) = 0.10 crit_points = np.array([d001, d005, d010]) alpha_ = np.array([0.01, 0.05, 0.10]) sig = pd.DataFrame({'alpha': alpha_, \ 'crit_points': crit_points}) print(sig) ind = sig.index[sig['alpha'] == alpha].tolist()[0] if D > sig.crit_points[ind]: H0 = 'reject' else: H0 = 'do not reject' KS_res = pd.DataFrame({'Statistic': 'Kolmogorov-Smirnov', 'D_N': [D], \ 'p-value': pval, \ 'critical value': sig.crit_points[ind], \ 'alpha': [alpha], 'decision': H0}) KS_res = KS_res[['Statistic', 'D_N', 'critical value', 'p-value', \ 'alpha', 'decision']] print(KS_res) return KS_res
def my_KS_GOF_mvM_I(X, Y_data, alpha=0.05): """ Function to get the GOF Kolmogorov-Smirnov test and its p-value CAUTION 1: This function is for mixtures of von Mises distributions CAUTION 2: X should be a random sample that can be related to a continuous distribution It is NOT suitable for data like the INTENSITY of light we get from the FFT of an image: ->> NEEDS modification. X: the observed r.s., relevant to the nature of the problem Y_data: a data frame containing the estimated parameters of the model the model could be a mixture of von Mises distributions only or a mixture of von Mises and Uniform distributions. By convention: the Uniform distribution, if any, is stored at the last line of the dataFrame 'Y_data' alpha: the level of significance; default is 0.05 """ angles = X[:, 0] values = X[:, 1] X = np.radians(angles) N = len(X) xx = np.sort(X) #xx = np.linspace(start=min(xx),stop=max(xx),num=len(xx)) # the values of the theoretical (model) distribution for the r.s. X is: scal_ = 0.5 # to scale the distribution on the semi-circle kap_mle = np.array(Y_data.Concentration) loc_mle = np.array(Y_data.Location) w_mle = np.array(Y_data.Weight) cY_b = np.zeros(N, ) # initialize the CDF fY_t = np.zeros(N, ) # initialize the PDF for mu, kap, wi in zip(loc_mle, kap_mle, w_mle): fY_i = stats.vonmises(kap, mu, scal_) fY_t += wi * fY_i.pdf(xx) # this may be wrong for the total CDF!!!: cY_b += wi * fY_i.cdf(xx) #cY_i = wi*stats.vonmises.cdf( xx, kap, mu, scal_ ) #cY_t +=cY_i # use the following only for the intensity values, since the angles are # equally-spaced. Do not use for a r.s. generated by python. #dx = abs(xx[0] - xx[1]) # get the ECDF of the r.s. x_values, y_values = ECDF_Intensity(angles, values) # # ------------------------------------- # # # for the += case of computing the CDF: # # compute the K-S test: # cY_ = cY_b # ii = np.arange(0, 1+1/N, 1/N) # vec1 = cY_ - ii[0:-1] # vec2 = ii[1::] - cY_ # # ------------------------------------- # # ------------------------------------- # # for the cumsum case of computing the CDF: # compute the K-S test: dx = np.diff(xx) cY_t = np.cumsum(fY_t[0:-1] * dx) # cY_ = cY_t # ii = np.arange(0, 1, 1/len(dx)) # vec1 = cY_ - ii # jj = np.arange(1/len(dx), 1+1/len(dx), 1/len(dx)) # vec2 = jj - cY_ # # ------------------------------------- # # Dm = np.array([vec1,vec2]) # D = max(np.max(Dm,axis=0)) # compute the K-S test: Dm = abs(cY_t - y_values[0:-1]) D = max(Dm) dd = (np.sqrt(N)) * D print('sqrt(N))*D =', dd) pval = 1 - KS_CDF(dd) print('p-value:', pval) pval_mod = 1 - KS_CDF(dd + 1. / (6. * np.sqrt(N))) print('p-value-mod:', pval_mod) fig, ax = plt.subplots(1, 1, figsize=(4, 3)) ax.plot(xx, fY_t, 'b', lw=2, alpha=0.6, label='PDF fit') ax.plot(xx[0:-1], cY_t, 'r', lw=2, alpha=0.6, label='CDF fit (cumsum)') ax.plot(xx, cY_b, 'g:', lw=2, alpha=0.6, label='CDF fit (+=)') ax.plot(x_values, y_values, 'c-.', lw=2, alpha=0.6, label='ECDF') ax.set_title('From my_KS_GOF') ax.legend() # the scipy.kstest function returns the following: #print(stats.kstest( X, pY_t )) #stats.kstest( Y5, 'vonmises', args=(s5[0], s5[1], scal_), alternative = 'greater' ) # critical points: d001 = 1.63 / np.sqrt(N) d005 = 1.36 / np.sqrt(N) d010 = 1.22 / np.sqrt(N) crit_points = np.array([d001, d005, d010]) alpha_ = np.array([0.01, 0.05, 0.10]) sig = pd.DataFrame({'alpha': alpha_, \ 'crit_points': crit_points}) print(sig) ind = sig.index[sig['alpha'] == alpha].tolist()[0] if D > sig.crit_points[ind]: H0 = 'reject' else: H0 = 'do not reject' KS_res = pd.DataFrame({'Statistic': 'Kolmogorov-Smirnov', 'D_N': [D], \ 'p-value': pval, \ 'critical value': sig.crit_points[ind], \ 'alpha': [alpha], 'decision': H0}) KS_res = KS_res[['Statistic', 'D_N', 'critical value', 'p-value', \ 'alpha', 'decision']] print(KS_res) return KS_res
df3 = stats.t.fit(Y3) fY3 = stats.t(df3[0]) cY3 = fY3.cdf(bY3) exp3 = N * np.diff(cY3) #exp3 = N*fY3.pdf(bY3) ax[1, 1].plot(bY3, cY3, 'g--', label='CDF-fY3') s4 = stats.lognorm.fit(Y4) fY4 = stats.lognorm(s4[0]) cY4 = fY4.cdf(bY4) exp4 = N * np.diff(cY4) #exp4 = N*fY4.pdf(bY4) ax[1, 2].plot(bY4, cY4, 'g--', label='CDF-fY4') s5 = stats.vonmises.fit(Y5, scale=scal_) fY5 = stats.vonmises(s5[0], s5[1], scale=scal_) cY5 = fY5.cdf(bY5) exp5 = N * np.diff(cY5) ax[1, 3].plot(bY5, cY5, 'g--', label='CDF-fY5') for i in range(0, 2): for j in range(0, 4): ax[i, j].legend() # get the chi-squared statistic based on the formula: chiSquared1 = np.sum(((O1 - exp1)**2) / exp1) print('mychi2 test Y1: ', chiSquared1) chiSquared1b = np.sum(((O1[2:-5] - exp1[2:-5])**2) / exp1[2:-5]) chiSquared3 = np.sum(((O3 - exp3)**2) / exp3) print('mychi2 test Y3: ', chiSquared3)
# PLOT in the same figure the original histogram and the model PDF: scal_ = 0.5 fig, ax = plt.subplots(1, 1, figsize=(9,4)) ax.set_title('Probability Density Function of Mixture Model (von Mises + Uniform)') ax.plot(angles, Int, 'b-', label='Original data') # prepare the PDFs: x_ = np.linspace( min(X_samples), max(X_samples), len(r_X) ) r_ = np.degrees(x_) x_ax = r_ X_tot = np.zeros(len(x_),) cXtot = np.zeros(len(x_),) jj = 0 # plot in the same histogram the approximations: for mu, kap, pii in zip(loc_mle, kap_mle, p_mle): jj += 1 fX_temp = stats.vonmises( kap, mu, scal_ ) # X_temp = pii*stats.vonmises.pdf( x_, kap, mu, scal_ ) X_temp = pii*fX_temp.pdf ( x_ ) X_tot += X_temp # ax.plot(x_ax, X_temp, linewidth=2, linestyle='--', \ # label='von Mises member {} '.format(jj)) #label=r'$\mu$ = {}, $\kappa$= {}, p= {} '.format(round(mu,3), round(kap,3), round(pii,3))) # this is wrong!!!: cXtot += pii*fX_temp.cdf( x_ ) # cXtot += pii*stats.vonmises.cdf( x_, kap, mu, scal_ ) ax.plot(x_ax, X_tot, color='red', linewidth=2, linestyle='-', label='Mixture fit') ax.set_xlabel(r'$\theta$ (degrees)', fontsize=12) ax.set_ylabel(r'$f(\theta)$', fontsize=12) ax.grid(color='gray', alpha=0.3, linestyle=':', linewidth=1) ax.legend(loc=1)
ax.set_xticks([]) ax.set_yticks([]); Compared to other KDEs in the Python ecosystem. The KDE implemented in ArviZ takes care of the boundaries of a distribution. Basically, ArviZ will assign a density of zero to any point outside the range of the data. Another nice feature of ArviZ's KDE is its the method used to estimate the _bandwith_, this method works pretty well for a wide range of distributions including multimodal ones. The following plot compares the KDEs for ArviZ (on the left) and SciPy (on the right). The blue line is the theoretical distribution and the orange ones are the kernel density estimations. def scipykdeplot(data, ax, **kwargs): x = np.linspace(data.min(), data.max(), len(data)) kde = stats.gaussian_kde(data) density = kde.evaluate(x) ax.plot(x, density, **kwargs) size = 1000 bw = 4.5 # ArviZ's default value _, ax = plt.subplots(5, 2, figsize=(15, 10), constrained_layout=True) a_dist = stats.vonmises(loc=np.pi, kappa=20) b_dist = stats.beta(a=2, b=5) c_dist = [stats.norm(-8, 0.75), stats.norm(8, 1)] d_dist = stats.norm(0, 1) e_dist = stats.uniform(-1, 1) a = a_dist.rvs(size) a = np.arctan2(np.sin(a), np.cos(a)) b = b_dist.rvs(size) c = np.concatenate((c_dist[0].rvs(7000), c_dist[1].rvs(3000))) d = d_dist.rvs(size) e = e_dist.rvs(size) ax[0, 0].set_title('ArviZ') ax[0, 1].set_title('Scipy')
data_th = np.array(all_th).reshape(-1) data_dcp = np.array(all_dc_p).reshape(-1) data_dc = np.array(all_dc).reshape(-1) # %% ##### #Inference for chemotactic strategy ##### #von Mises distribution test d2r = np.pi/180 vm_par = vonmises.fit((data_th-alpha*data_dcp)*d2r, scale=1) plt.hist(data_th*d2r,bins=100,normed=True); #plt.hold(True) xx = np.linspace(np.min(data_th*d2r),np.max(data_th*d2r),100) rv = vonmises(vm_par[0]) plt.plot(xx, rv.pdf(xx),linewidth=3) #negative log-likelihood def nLL(THETA, dth,dcp,dc): #a_, k_, A_, B_, C_, D_ = THETA #inferred paramter a_,k_,A_,B_ = THETA #P = sigmoid(A_, B_, C_, D_, dcp) P = sigmoid2(A_,B_,dc) #VM = np.exp(k_*np.cos((dth-a_*dcp)*d2r)) / (2*np.pi*iv(0,k_))#von Mises distribution #vm_par = vonmises.fit((dth-a_*dcp)*d2r, scale=1) rv = vonmises(k_)#(vm_par[0]) VM = rv.pdf((dth-a_*dcp)*d2r) marginalP = np.multiply((1-P), VM) + (1/(2*np.pi))*P nll = -np.sum(np.log(marginalP+1e-7))#, axis=1) #fst = np.einsum('ij,ij->i', 1-P, VM)
def check_vonmises_cdf_periodic(k, l, s, x): vm = stats.vonmises(k, loc=l, scale=s) assert_almost_equal(vm.cdf(x) % 1, vm.cdf(x % (2 * numpy.pi * s)) % 1)
def plot_mixs_vonMises_Specific(mixX, angles): """ Plot the PDF , CDF, SF, and PPF of a random variable that follows the von Mises distribution """ n_clus = mixX.n_clusters x_min_, x_max_ = np.radians((min(angles), max(angles))) x999 = np.linspace(x_min_, x_max_, 1000) tXpdf = np.zeros(len(x999), ) tXcdf = np.zeros(len(x999), ) tXsf = np.zeros(len(x999), ) tXppf = np.zeros(len(x999), ) fig, axes = plt.subplots(n_clus + 1, 4, figsize=(15, 3 * (n_clus + 1))) for i in range(n_clus): str_1 = 'von Mises for X_' + str(i + 1) # location in rads: loc = math.atan2( mixX.cluster_centers_[i,1], \ mixX.cluster_centers_[i,0]) print('loc=', loc) # concentration: kappa = mixX.concentrations_[i] # weight: weight = mixX.weights_[i] # construct the member von Mises: X = stats.vonmises(kappa, loc) # the mixture of the von Mises individuals: tXpdf += weight * X.pdf(x999) tXcdf += weight * X.cdf(x999) tXsf += weight * X.sf(x999) tXppf += weight * X.ppf(x999) # the confidence interval 95(%): x_min_95, x_max_95 = vonmises.interval(0.90, kappa, loc) print('a=', x_min_95) print('b=', x_max_95) # x_min_95, x_max_95 = X.interval(0.95) x95 = np.linspace(x_min_95, x_max_95, 1000) # construct the PDF: # axes[i, 0].plot(x999, vonmises.pdf(x999, kappa, loc), label='PDF') axes[i, 0].plot(x999, X.pdf(x999), label='PDF') axes[i, 0].set_ylabel(str_1) axes[i, 0].fill_between(x95, X.pdf(x95), alpha=0.25) axes[i, 1].plot(x999, X.cdf(x999), label='CDF') axes[i, 1].plot(x999, X.sf(x999), label='SF') axes[i, 2].plot(x999, X.ppf(x999), label='PPF') stats.probplot(stats.vonmises.rvs(kappa, loc, size=len(x999)), \ dist=stats.vonmises, \ sparams=(kappa, loc), plot=axes[i, 3]) axes[i, 0].legend() axes[i, 1].legend() axes[i, 2].legend() axes[n_clus, 0].plot(x999, tXpdf, label='PDF') axes[n_clus, 1].plot(x999, tXcdf, label='CDF') axes[n_clus, 1].plot(x999, tXsf, label='SF') axes[n_clus, 2].plot(x999, tXppf, label='PPF') axes[n_clus, 0].set_ylabel('Mixture von Mises') # rr = stats.vonmises.rvs(kappa, loc, size=len(x999)) # axes[n_clus, 3].plot(x999, rr, label='random PDF') # axes[n_clus, 3].hist(rr, normed=True, histtype='stepfilled', alpha=0.2) axes[n_clus, 0].legend() axes[n_clus, 1].legend() axes[n_clus, 2].legend()