def truncinvgaussprior_pdf(data, mu, sigma): epsilon = 1e-200 term2 = (invgauss.pdf(data, sigma, scale=mu, loc=0.0) / (invgauss.cdf(1.0, sigma, scale=mu, loc=0.0) - invgauss.cdf(0.0, sigma, scale=mu, loc=0.0))) * (data < 1.0) return term2 + epsilon
def compute_LogLikelihood(data, fisk_params, ig_params, lognorm_params, gamma_params): n_fisk_params = 2 n_ig_params = 3 n_lognorm_params = 3 n_gamma_params = 3 trunc_data = data[data != 1.0] prob = np.sum(data == 1.0) / float(data.shape[0]) ll_fisk_trunc = np.sum(-np.log(fisk.pdf(trunc_data, *fisk_params))) bic = np.log(trunc_data.shape[0]) * n_fisk_params - 2 * ll_fisk_trunc #ll_fisk_nontrunc = np.sum(-np.log(prob * np.ones())) ll_ig_trunc = np.sum(-np.log(invgauss.pdf(trunc_data, *ig_params))) #ll_ig_nontrunc = np.sum(np.log(prob)) ll_ln_trunc = np.sum(-np.log(lognorm.pdf(trunc_data, *lognorm_params))) #ll_ln_nontrunc = np.sum(np.log(prob)) ll_gamma_trunc = np.sum(-np.log(gamma.pdf(trunc_data, *gamma_params))) #ll_exp_trunc = np.sum(-np.log(expon.pdf(trunc_data, *exp_params))) #ll_exp_nontrunc = np.sum(np.log(prob)) return ll_fisk_trunc, ll_ig_trunc, ll_ln_trunc, ll_gamma_trunc
def compute_LogLikelihood_ExtData(data, fisk_params, ig_params): prob = fisk_params[0] fisk_params = fisk_params[1] num_one = np.sum(data == 1.0) trunc_data = data[data != 1.0] fisk_trunc_ll = np.sum(-np.log(fisk.pdf(trunc_data, *fisk_params))) fisk_nontrunc_ll = -np.log(prob) * num_one fisk_ll = fisk_trunc_ll + fisk_nontrunc_ll ig_trunc_ll = np.sum(-np.log(invgauss.pdf(trunc_data, *fisk_params))) ig_nontrunc_ll = -np.log(prob) * num_one ig_ll = ig_trunc_ll + ig_nontrunc_ll return fisk_ll, ig_ll
def function(intensity, int_intensity_diff, shape): return intensity * invgauss.pdf(x=int_intensity_diff, mu=shape)
def preprocess_groundtruth_artificial_noise_balanced(ground_truth_folders,before_frac,windowsize,after_frac,noise_level,sampling_rate,smoothing,omission_list=[],permute=1,maximum_traces=5000000,verbose=3,replicas=1,causal_kernel=0): """ The calcium traces are extracted, brought to a desired 'noise_level' and resampled at the 'sampling_rate' in the function 'calibrated_ground_truth_artificial_noise()', The function 'preprocess_groundtruth_artificial_noise()' goes through all 'ground_truth_folders' and extracts the ground truth in a way that can be used to train the deep network. As output, this function creates a large matrix 'X' that contains for each timepoint of each calcium trace a vector of length 'windowsize' around the timepoint. The function also creates a vector Y that contains the corresponding spikes/non-spikes. Random permutations ('permute = 1') un-do the original sequence of the timepoints. The number of samples is limited to 5 million. """ sub_traces_all = [None]*500 sub_traces_events_all = [None]*500 events_all = [None]*500 neuron_counter = 0 nbx_datapoints = [None]*500 dataset_sizes = np.zeros(len(ground_truth_folders),) dataset_indices = [None]*500 # Go through all ground truth data sets and extract re-sampled ground truth for dataset_index,training_dataset in enumerate(ground_truth_folders): base_folder = os.getcwd() # Exception handling ('try') is used here to catch errors that arise if for example # some of the datasets contribute zero samples because they do not contain # recordings with sufficiently low noise levels (must be lower than 'noise_level') # or sufficiently long trials (must be significantly longer than 'window_size'). try: if verbose > 1: print('Preprocessing dataset number', dataset_index) sub_traces_allX, sub_traces_events_allX, frame_rate, events_allX = calibrated_ground_truth_artificial_noise(ground_truth_folders[dataset_index],noise_level,sampling_rate,replicas,omission_list, verbose) datapoint_counter = 0 for k in range(len(sub_traces_allX)): try: datapoint_counter += sub_traces_allX[k].shape[1]*sub_traces_allX[k].shape[0] except: if verbose > 2: print('No things for k={}'.format(k)) dataset_sizes[dataset_index] = datapoint_counter nbx_datapoints[neuron_counter:neuron_counter+len(sub_traces_allX)] = datapoint_counter*np.ones(len(sub_traces_allX),) sub_traces_all[neuron_counter:neuron_counter+len(sub_traces_allX)] = sub_traces_allX sub_traces_events_all[neuron_counter:neuron_counter+len(sub_traces_allX)] = sub_traces_events_allX events_all[neuron_counter:neuron_counter+len(sub_traces_allX)] = events_allX dataset_indices[neuron_counter:neuron_counter+len(sub_traces_allX)] = dataset_index*np.ones(len(sub_traces_allX),) neuron_counter += len(sub_traces_allX) except: sub_traces_allX = None dataset_sizes[dataset_index] = np.NaN os.chdir(base_folder) mininum_traces = 15e6/len(ground_truth_folders) # Reduce the number of data points for relatively large data sets to avoid bias reduction_factors = dataset_sizes/mininum_traces if np.nanmax(reduction_factors) > 1: oversampling = 1 else: oversampling = 0 if verbose>1: print('Reducing ground truth by a factor of approximately '+str(int(3*np.nanmean(reduction_factors)))+'.') nbx_datapoints = nbx_datapoints[:neuron_counter] sub_traces_all = sub_traces_all[:neuron_counter] sub_traces_events_all = sub_traces_events_all[:neuron_counter] events_all = events_all[:neuron_counter] dataset_indices = dataset_indices[:neuron_counter] if verbose>1: print('Number of neurons in the ground truth: '+str(len(sub_traces_events_all))) before = int(before_frac*windowsize) after = int(after_frac*windowsize) if causal_kernel: xx = np.arange(0,199)/sampling_rate yy = invgauss.pdf(xx,smoothing/sampling_rate*2.0,101/sampling_rate,1) ix = np.argmax(yy) yy = np.roll(yy,int((99-ix)/1.5)) causal_smoothing_kernel = yy/np.nansum(yy) X = np.zeros((15000000,windowsize,)) Y = np.zeros((15000000,)) # For-loop to generate the outputs 'X' and 'Y' counter = 0 for neuron_ix,(sub_traces,sub_traces_events) in enumerate(zip(sub_traces_all,sub_traces_events_all)): if sub_traces is not None: for trace_index in range(sub_traces.shape[1]): single_trace = sub_traces[:,trace_index] single_spikes = sub_traces_events[:,trace_index] # Optional: Generates ground truth with causally smoothed kernel (see paper for details) if causal_kernel: single_spikes = convolve(single_spikes.astype(float),causal_smoothing_kernel,mode='same') else: single_spikes = gaussian_filter(single_spikes.astype(float), sigma=smoothing) recording_length = np.sum(~np.isnan(single_trace)) datapoints_used = np.minimum(len(single_spikes)-windowsize,recording_length-windowsize) # Discarding (randomly chosen) samples to reduce ground truth dataset size if oversampling: datapoints_used_rand = np.random.permutation(datapoints_used) reduce_samples = reduction_factors[int(dataset_indices[neuron_ix])] datapoints_used_rand = datapoints_used_rand[0:int(len(datapoints_used_rand)/( max(reduce_samples,1) ))] else: datapoints_used_rand = np.arange(datapoints_used) for time_points in datapoints_used_rand: Y[counter,] = single_spikes[time_points+before] X[counter,:,] = single_trace[time_points:(time_points+before+after)] counter += 1 Y = np.expand_dims(Y[:counter],axis=1) X = np.expand_dims(X[:counter,:],axis=2) # Permute the ordering of the output for improved gradient descent during learning if permute == 1: p = np.random.permutation(len(X)) X = X[p,:,:] Y = Y[p,:] # Maximum of 5e6 training samples X = X[:5000000,:,:] Y = Y[:5000000,:] X = X[np.where(~np.isnan(Y))[0],:,:] Y = Y[np.where(~np.isnan(Y))[0],:] os.chdir(base_folder) if verbose > 1: print('Shape of training dataset X: {} Y: {}'.format(X.shape, Y.shape)) return X,Y
'number of simulations run = ' + str(number_of_simulations) + '\np_regain = ' + str(regain_probability) + '\np_lose = ' + str(loss_probability), ha='right', linespacing=1.8) # Adds a legend indicating the number of simulations, p_regain value, and p_lose value. # Places this legend right justified near the right edge, and at 65% of the height of the graph. if with_normal_distribution_fit: from scipy.stats import norm norm_values = numpy.linspace(0, bins.max(), steps_to_save_max) - 0.5 parameter_estimates = norm.fit(data) normal_fit = number_of_simulations * norm.pdf(norm_values, *parameter_estimates) ax.plot(normal_fit, '--', color='red', label='normal distribution curve fit') ax.legend(loc='upper right') # If with_normal_distribution_fit is True, generate a line space with same dimensions as histogram. # Use these values with scipy.stats.norm to generate a normal curve of best fit as a red dashed line. if with_invgauss_distribution_fit: from scipy.stats import invgauss invgauss_values = numpy.linspace(0, bins.max(), steps_to_save_max) - 0.5 parameter_estimates = invgauss.fit(data) invgauss_fit = number_of_simulations * invgauss.pdf(invgauss_values, *parameter_estimates) ax.plot(invgauss_fit, '--', color='blue', label='inverse gaussian distribution curve fit') ax.legend(loc='upper right') # If with_invgauss_distribution_fit is True, generate a line space with same dimensions as histogram. # Use these values with scipy.stats.invgauss to generate an inverse gaussian curve of best fit as a blue dashed line. plt.show()
c = shape2 b = scale2 y2 = weibull_min.pdf(data2, c, scale=b) log_likelihood2 = np.sum(np.log(y2)) print("Weibull loglikelihood = " + str(log_likelihood2)) aic2= -2 * log_likelihood2 + 2 * num_params print("Weibull AIC = " + str(aic2)) # https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.invgauss.html # the argument floc=0 to ensure that it does not treat the location as a free parameter # Parameter estimates for generic data shape3, loc3, scale3 = invgauss.fit(data2, floc=0) mu3 = shape3 lambda3 = scale3 # fitting the data with the estimated parameters y3 = invgauss.pdf(data2, mu3, scale=lambda3) # calculate the log_likelihood log_likelihood3 = np.sum(np.log(y3)) print("Wald loglikelihood = " + str(log_likelihood3)) # calculate AIC aic3= -2 * log_likelihood3 + 2 * num_params print("Wald AIC = " + str(aic3)) # ploting qq plot with 3 distributions, fit the parameters by calling distribution.fit() sm.qqplot(data2, lognorm, fit=True, loc=0, line='45') plt.title('Lognorm distribution qq plot') sm.qqplot(data2, weibull_min, fit=True, loc=0, line='45') plt.title('Weibull distribution qq plot') sm.qqplot(data2, invgauss, fit=True, loc=0, line='45') plt.title('Wald distribution qq plot') plt.show()
import numpy as np import matplotlib.pyplot as plt from scipy.stats import invgauss # Wald Distribution from scipy: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.invgauss.html # Wikipedia: https://en.wikipedia.org/wiki/Inverse_Gaussian_distribution mu1 = 1 mu2 = 3 l1 = 1 l2 = 3 l3 = 0.2 x = np.linspace(0, 5, 1000) # scale = lambda y1 = invgauss.pdf(x, mu1, scale=l1) y2 = invgauss.pdf(x, mu1, scale=l2) y3 = invgauss.pdf(x, mu1, scale=l3) y4 = invgauss.pdf(x, mu2, scale=l1) y5 = invgauss.pdf(x, mu2, scale=l3) fig, ax = plt.subplots(1, 1) ax.plot(x, y1, label = '1: $\mu$ = ' + str(mu1) + ", $\lambda$ = " + str(l1)) ax.plot(x, y2, label = '2: $\mu$ = ' + str(mu1) + ", $\lambda$ = " + str(l2)) ax.plot(x, y3, label = '3: $\mu$ = ' + str(mu1) + ", $\lambda$ = " + str(l3)) ax.plot(x, y4, label = '4: $\mu$ = ' + str(mu2) + ", $\lambda$ = " + str(l1)) ax.plot(x, y5, 'k', label = '5: $\mu$ = ' + str(mu2) + ", $\lambda$ = " + str(l3)) ax.legend(loc='best', frameon=False) plt.title("Wald Distribution PDF plot") plt.show()
from scipy.stats import invgauss import matplotlib.pyplot as plt import numpy as np #invgauss.pdf(x, mu) = 1 / sqrt(2*pi*x**3) * exp(-(x-mu)**2/(2*x*mu**2)) fig, ax = plt.subplots(1, 1) mu = 1 x = np.linspace(invgauss.pdf(0.01, mu),invgauss.pdf(0.99, mu), 100) ax.plot(x, invgauss.pdf(x, mu),'r-', lw=5, alpha=0.6, label='invgauss pdf') rv = invgauss(mu) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') r = invgauss.rvs(mu, size=1000) ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
def run_probability(): from scipy.stats import norm, invgauss import matplotlib.pyplot as plt # Naca 63-2-215 x0 = np.array([0.06343078, 0.1400427, 0.1070076, 0.13289899, -0.01762751, -0.08041535, 0.02156679]) xl = np.array([0.04343078, 0.0900427, 0.0570076, 0.08289899, -0.06762751, -0.13041535, -0.02843321]) xu = np.array([0.08343078, 0.1900427, 0.1570076, 0.18289899, 0.03237249, -0.03041535, 0.07156679]) xopt = np.array([0.04754971, 0.12243828, 0.06323741, 0.15394668, -0.04237265, -0.13041535, 0.07156679]) xopt = np.array([0.0556, 0.1238, 0.0570, 0.1489, -0.0524, -0.1066, 0.0702]) #cfdDataPath = r'D:\3. Conferences\201606 - AIAA Aviation 2016\data\results 20160512\AVIATION2016_DOE1.txt' cfdDataPath = 'DOE_LHS9_50_fighter.txt' pathPDF0 = 'PDF_baseline.txt' pathPDF1 = 'PDF_RGVFM_surrogate.txt' #pathPDF2 = 'PDF_RGVFM_surrogate.txt' fid0 = open(pathPDF0,'wt') fid1 = open(pathPDF1,'wt') obj = ObjectiveFunction(xl,xu,x0,cfdDataPath) x0norm = obj.xNormalization.normalize(np.hstack([0.,0,x0])) x0norm = x0norm[2:] xoptNorm = obj.xNormalization.normalize(np.hstack([0.,0,xopt])) xoptNorm = xoptNorm[2:] print obj.objective_function(x0norm,True, True, True) print obj.objective_function(xoptNorm,True, True, True) # --- pdf plot section --- ns = 200 # number of samples Mach = np.random.normal(obj.meanMach, obj.varMach/2.0, ns) alpha = np.random.normal(obj.meanAlpha, obj.varAlpha/2.0, ns) cl0 = np.zeros(ns) cl1 = np.zeros(ns) cd0 = np.zeros(ns) cd1 = np.zeros(ns) cm0 = np.zeros(ns) cm1 = np.zeros(ns) fid0.write('Mach\talpha\tcl\tcd\tcm\n') fid1.write('Mach\talpha\tcl\tcd\tcm\n') for i in range(ns): cl0[i],cd0[i],cm0[i] = obj.get_analysis(x0,Mach[i],alpha[i]) cl1[i],cd1[i],cm1[i] = obj.get_analysis(xopt,Mach[i], alpha[i]) fid0.write('%.8f\t%.8f\t%.8f\t%.8f\t%.8f\n'%(Mach[i],alpha[i],cl0[i],cd0[i],cm0[i])) fid1.write('%.8f\t%.8f\t%.8f\t%.8f\t%.8f\n'%(Mach[i],alpha[i],cl1[i],cd1[i],cm1[i])) mean0 = np.mean(cd0) sigma0 = np.std(cd0) mean1 = np.mean(cd1) sigma1 = np.std(cd1) fid0.write('mean: %.8f\nvariance: %.8f\n'%(mean0,sigma0)) fid1.write('mean: %.8f\nvariance: %.8f\n'%(mean1,sigma1)) fid0.close() fid1.close() xplot0 = np.linspace(min(cd0), max(cd0), 1000) xplot1 = np.linspace(min(cd1), max(cd1), 1000) pdf0 = invgauss.pdf(xplot0, mean0, sigma0) pdf1 = invgauss.pdf(xplot1, mean1, sigma1) print mean0, sigma0 print mean1, sigma1 param0 = invgauss.fit(cd0,mean0) pdfFit = invgauss.pdf(xplot0,param0[0]) plt.figure(1) plt.hold(True) plt.hist(cd0, bins=20, normed=True,alpha=0.3, color='b') plt.plot(xplot0, pdfFit,'b',lw=2.0) # plt.figure(3) # plt.plot(xplot0,pdf0) # # #plt.figure(2) # #plt.hold(True) # plt.hist(cd1, bins=20, normed=True,alpha=0.3, color='r') # plt.plot(xplot1, pdf1,'r',lw=2.0) # plt.legend(['Baseline','Optimum'],loc='upper right') # plt.xlabel('Drag coefficient') plt.show()
from scipy.stats import invgauss import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) # Calculate a few first moments: mu = 0.145 mean, var, skew, kurt = invgauss.stats(mu, moments='mvsk') # Display the probability density function (``pdf``): x = np.linspace(invgauss.ppf(0.01, mu), invgauss.ppf(0.99, mu), 100) ax.plot(x, invgauss.pdf(x, mu), 'r-', lw=5, alpha=0.6, label='invgauss pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = invgauss(mu) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = invgauss.ppf([0.001, 0.5, 0.999], mu) np.allclose([0.001, 0.5, 0.999], invgauss.cdf(vals, mu)) # True # Generate random numbers:
for v in range(1,nodes_in_net+1): single_pds = 1 - math.pow((1-pd), 1/v) pds.append(single_pds) pds_mean = np.mean(pds) #loop to get the values of pfs for v in range(1,nodes_in_net+1): single_pds = 1 - math.pow((1-pf), 1/v) pfs.append(single_pds) pfs_mean = np.mean(pfs) #get all q inverse for pds for i in pds: x = invgauss.pdf(i, pds_mean) q_inv_pds.append(x) #get all q inverse for pfs for i in pfs: x = invgauss.pdf(i, pfs_mean) q_inv_pfs.append(x) for i in range(nodes_in_net): tsi = math.pow( ( ( (q_inv_pfs [i] - q_inv_pds[i]) * (1+ snr) ) / snr ),2) ts.append(tsi) ts_sorted = sorted(ts) # Sorted the nodes in ascending order based on TS i cmu_sum = np.cumsum(ts_sorted) # loop over the cumulative sum of sensing time of nodes and get the nodes with ST less than minimum sensing time
def inv_gaussian_pdf(x, mu=mu_def, rate=rate_def): #change to double-param form y = (x * mu**2) / rate return invgauss.pdf(y, mu)