def my_std(N): ''' Finding standard deviation for each step in list of steps N - list of steps ''' stds = np.zeros(len(N)) # initializing list standard deviations for j, step in enumerate(N): guess = np.zeros(100) # initializng list of guessed final positions for i in range(100): guess[i], steps = position(0.5, step) # guessing final positions stds[j] = scipy.nanstd( guess) # finding standaard deviation of current step return stds
def threeSigma(a): ''' This function is a simple implementation of 3-sigma method ''' mu = np.nanmean(a) sigma = sp.nanstd(a, ddof=1) UpEdge = mu + 3 * sigma LowEdge = mu - 3 * sigma a1 = np.concatenate(([float('nan')], a[:-1]), axis=0) a2 = np.concatenate((a[1:], [float('nan')]), axis=0) A = np.transpose(np.array([a1, a, a2])) count = 0 for k, ele in enumerate(a): if ele > UpEdge or ele < LowEdge: a[k] = np.nanmean(A[k, :]) count = count + 1 return a, count
def guess(times, p, N): ''' Guessing the last position of drunk walker, plotting histogram, calculating standard deviation and testing for normality to know wheter the distribution is Gaussian or not ''' guess = np.zeros(times) # list of geussed final positions # Calculating for i in range(times): guess[i], steps = position( p, N ) # getting guessed position and each step after n times of the computation avg = np.sum( guess ) / 100 # calculating average of the final positions after n times of the computation # Getting the histogram and plotting it hist, bin_edges = scipy.histogram(guess) plt.title("Histogram for probability=" + str(p) + " and steps=" + str(N)) plt.bar(bin_edges[:-1], hist, width=5) plt.show() print("Calculations for", times, "times, with probability =", p, "and steps =", N, ":") print("Final position:", avg) # Getting standard deviation std = scipy.nanstd(guess) print("Standard deviation:", std) # Applying Shapiro test to know if it is Gaussian distribution or not shp = stats.shapiro(guess) if shp[1] > .05: print("Shapiro test shows that it is Gaussian distribution, result:", shp[1]) else: print( 'Shapiro test shows that it is not Gaussian distribution, result:', shp[1]) print()
def profile_metrics(profile_labels,profile_preds,counts_labels,counts_preds,outf_prefix,title,pseudoreps,flank,smooth_observed_profile, smooth_predicted_profile,smooth_preps): #profile-preds is in logit space #get the softmax to put in probability space coords=profile_labels.index.tolist() chroms=[i[0] for i in coords] summits=[i[1] for i in coords] profile_labels=profile_labels.values profile_preds=profile_preds.values #perform smoothing of labels/predictions, if specified if smooth_observed_profile==True: profile_labels=scipy.ndimage.gaussian_filter1d(profile_labels, 7,axis=1, truncate=(80 / 14)) if smooth_predicted_profile==True: profile_preds=scipy.ndimage.gaussian_filter1d(profile_preds, 7, axis=1, truncate=(80/14)) profile_preds_softmax=softmax(profile_preds,axis=1) #get multinomial nll print(profile_labels.shape) print(profile_preds.shape) print(counts_labels.shape) mnnll_vals=profile_multinomial_nll(np.expand_dims(np.expand_dims(profile_labels,axis=1),axis=-1), np.expand_dims(np.expand_dims(np.log(profile_preds_softmax),axis=1),axis=-1), np.expand_dims(np.exp(counts_labels),axis=-1)) #put the counts in probability space to use jsd num_regions=profile_labels.shape[0] region_jsd=[] pseudorep_jsd=[] shuffled_labels_jsd=[] #shuffled labels vs observed labels outf=open(outf_prefix+".jsd.txt",'w') outf.write('Region\tJSD\tPseudorepJSD\tNLL\n') for i in range(num_regions): denominator=np.nansum(profile_labels[i,:]) if denominator!=0: cur_profile_labels_prob=profile_labels[i,:]/denominator else: cur_profile_labels_prob=profile_labels[i,:] cur_profile_preds_softmax=profile_preds_softmax[i,:] cur_jsd=jensenshannon(cur_profile_labels_prob,cur_profile_preds_softmax) region_jsd.append(cur_jsd) #get the jsd of shuffled label with true label shuffled_labels=np.random.permutation(profile_labels[i,:]) shuffled_labels_prob=shuffled_labels/np.nansum(shuffled_labels) shuffled_labels_jsd.append(jensenshannon(cur_profile_labels_prob,shuffled_labels_prob)) if pseudoreps is not None: prep1_vals=np.nan_to_num(pseudoreps[0].values(chroms[i],summits[i]-flank,summits[i]+flank,numpy=True)) prep2_vals=np.nan_to_num(pseudoreps[1].values(chroms[i],summits[i]-flank,summits[i]+flank,numpy=True)) if smooth_preps==True: prep1_vals=scipy.ndimage.gaussian_filter1d(prep1_vals, 7, truncate=(80 / 14)) prep2_vals=scipy.ndimage.gaussian_filter1d(prep2_vals, 7, truncate=(80 / 14)) #normalize if np.nansum(prep1_vals)!=0: prep1_vals=prep1_vals/np.nansum(prep1_vals) if np.nansum(prep2_vals)!=0: prep2_vals=prep2_vals/np.nansum(prep2_vals) prep_jsd=jensenshannon(prep1_vals,prep2_vals) pseudorep_jsd.append(prep_jsd) else: prep_jsd=None outf.write(str(chroms[i])+'\t'+str(summits[i])+'\t'+str(cur_jsd)+'\t'+str(prep_jsd)+'\t'+str(mnnll_vals[i])+'\n') outf.close() num_bins=100 plt.rcParams["figure.figsize"]=8,8 #plot mnnll histogram plt.figure() n,bins,patches=plt.hist(mnnll_vals,num_bins,facecolor='blue',alpha=0.5,label="Predicted vs Labels") plt.xlabel('Multinomial Negative LL Profile Labels and Preds in Probability Space') plt.title("MNNLL:"+title) plt.legend(loc='best') plt.savefig(outf_prefix+".mnnll.png",format='png',dpi=300) #plot jsd histogram plt.figure() n,bins,patches=plt.hist(region_jsd,num_bins,facecolor='blue',alpha=0.5,label="Predicted vs Labels") if prep_jsd is not None: n2,bins2,patches2=plt.hist(pseudorep_jsd,num_bins,facecolor='red',alpha=0.5,label="Pseudoreps") n3,bins3,patches3=plt.hist(shuffled_labels_jsd,num_bins,facecolor='black',alpha=0.5,label='Labels vs Shuffled Labels') plt.xlabel('Jensen Shannon Distance Profile Labels and Preds in Probability Space') plt.title("JSD Dist.:"+title) plt.legend(loc='best') plt.savefig(outf_prefix+".jsd.png",format='png',dpi=300) if prep_jsd is not None: density_scatter(np.asarray(region_jsd), np.asarray(pseudorep_jsd), xlab='JSD Predict vs Labels', ylab='JSD Pseudoreps', title='JSD vs Pseudoreps:'+title, figtitle=outf_prefix+".jsd.pseudorep.png") #get mean and std if len(pseudorep_jsd)>0: return nanmean(region_jsd), nanstd(region_jsd), nanmean(mnnll_vals), nanstd(mnnll_vals), nanmean(pseudorep_jsd), nanstd(pseudorep_jsd), nanmean(shuffled_labels_jsd), nanstd(shuffled_labels_jsd) else: return nanmean(region_jsd), nanstd(region_jsd), nanmean(mnnll_vals), nanstd(mnnll_vals), None, None, nanmean(shuffled_labels_jsd), nanstd(shuffled_labels_jsd)
def initZ(self, pmean, pvar, qmean, qvar, qE=None, qE2=None, covariates=None, scale_covariates=None): """Method to initialise the latent variables PARAMETERS ---------- pmean: pvar: qmean qvar qE qE2 covariates: nd array matrix of covariates with dimensions (nsamples,ncovariates) scale_covariates: """ # Initialise mean of the Q distribution if qmean is not None: if isinstance(qmean, str): if qmean == "random": # Random initialisation of latent variables qmean = stats.norm.rvs(loc=0, scale=1, size=(self.N, self.K)) elif qmean == "orthogonal": # Latent variables are initialised randomly but ensuring orthogonality pca = sklearn.decomposition.PCA(n_components=self.K, copy=True, whiten=True) pca.fit( stats.norm.rvs(loc=0, scale=1, size=(self.N, 9999)).T) qmean = pca.components_.T elif qmean == "pca": # Latent variables are initialised from PCA in the concatenated matrix pca = sklearn.decomposition.PCA(n_components=self.K, copy=True, whiten=True) pca.fit(s.concatenate(self.data, axis=1).T) qmean = pca.components_.T elif isinstance(qmean, s.ndarray): assert qmean.shape == (self.N, self.K) elif isinstance(qmean, (int, float)): qmean = s.ones((self.N, self.K)) * qmean else: print("Wrong initialisation for Z") exit() # Add covariates if covariates is not None: assert scale_covariates != None, "If you use covariates also define data_opts['scale_covariates']" # Select indices for covaraites idx_covariates = s.array(range(covariates.shape[1])) # Center and scale the covariates to match the prior distribution N(0,1) # to-do: this needs to be improved to take the particular mean and var into account # covariates[scale_covariates] = (covariates - covariates.mean(axis=0)) / covariates.std(axis=0) scale_covariates = s.array(scale_covariates) covariates[:, scale_covariates] = ( covariates[:, scale_covariates] - s.nanmean(covariates[:, scale_covariates], axis=0)) / s.nanstd( covariates[:, scale_covariates], axis=0) # Set to zero the missing values in the covariates covariates[s.isnan(covariates)] = 0. qmean[:, idx_covariates] = covariates else: idx_covariates = None # Initialise the node # self.Z = Constant_Node(dim=(self.N,self.K), value=qmean) self.Z = Z_Node(dim=(self.N, self.K), pmean=s.ones((self.N, self.K)) * pmean, pvar=s.ones((self.K, )) * pvar, qmean=s.ones((self.N, self.K)) * qmean, qvar=s.ones((self.N, self.K)) * qvar, qE=qE, qE2=qE2, idx_covariates=idx_covariates) self.nodes["Z"] = self.Z
# Need to use underlying numpy arrays for singleton expansion ('broadcasting') # and form new DataFrame using appropriate column names. # TODO use DataFrame.sub() instead: # TODO wcl_foldch = np.log2(wcl[wcl_exp]).sub(np.log2(wcl[wcl_ctrl])) wcl_foldch = pd.DataFrame( np.log2(wcl[wcl_exp]).values - np.log2(wcl[wcl_ctrl]).values, columns=wcl_exp, index=names ) wclp_foldch = pd.DataFrame( np.log2(wclp[wclp_exp]).values - np.log2(wclp[wclp_ctrl]).values, columns=wclp_exp, index=names ) ub_foldch = pd.DataFrame( np.log2(ub[ub_exp]).values - np.log2(ub[ub_ctrl]).values, columns=ub_exp, index=names ) ubp_foldch = pd.DataFrame( np.log2(ubp[ubp_exp]).values - np.log2(ubp[ubp_ctrl]).values, columns=ubp_exp, index=names ) wcl_st = (wcl - sp.nanmean(wcl)) / sp.nanstd(wcl) wclp_st = (wclp - sp.nanmean(wclp)) / sp.nanstd(wclp) ub_st = (ub - sp.nanmean(ub)) / sp.nanstd(ub) ubp_st = (ubp - sp.nanmean(ubp)) / sp.nanstd(ubp)
def regridding_for_loop(data, modis_data): # Regridding data into 1x1 degree using a "for loop" dims_data = len(data.keys()) product = {'LWP': np.zeros(dims_data)*np.nan, 'reff':np.zeros(dims_data)*np.nan,\ 'LWP_mean':np.zeros(dims_data)*np.nan,'LWP_std':np.zeros(dims_data)*np.nan, \ 'LWP_median': np.zeros(dims_data)*np.nan, 'LWP_skewness':np.zeros(dims_data)*np.nan, \ 'LWP_kurtosis': np.zeros(dims_data) * np.nan, \ 'LWP_all_mean': np.zeros(dims_data) * np.nan, 'LWP_all_std': np.zeros(dims_data) * np.nan, \ 'LWP_all_skewness': np.zeros(dims_data) * np.nan, \ 'LWP_mean_zeros': np.zeros(dims_data) * np.nan, 'LWP_std_zeros': np.zeros(dims_data) * np.nan, \ 'tau_skewness': np.zeros(dims_data) * np.nan, 'tau_kurtosis':np.zeros(dims_data)*np.nan,\ 'CDNC_mean':np.zeros(dims_data)*np.nan, 'CDNC_std':np.zeros(dims_data)*np.nan, 'CDNC_median':np.zeros(dims_data)*np.nan, \ 're_mean': np.zeros(dims_data) * np.nan, \ 'CF_mean': np.zeros(dims_data) * np.nan, 'CF_MODIS_mean': np.zeros(dims_data) * np.nan,'FFT_1D_max': np.zeros(dims_data) * np.nan,\ 'CF':np.zeros(dims_data)*np.nan,\ 'reflectance_mean':np.zeros(dims_data)*np.nan, 'reflectance_std':np.zeros(dims_data)*np.nan, 'reflectance_skewness':np.zeros(dims_data)*np.nan,\ 'albedo_mean':np.zeros(dims_data)*np.nan,\ 'f_name': ['']*dims_data, 'lat_center':np.zeros(dims_data)*np.nan, 'lon_center':np.zeros(dims_data)*np.nan} modis_data['tau_CF'] = np.zeros(dims_data) * np.nan modis_data['Cloud_Water_Path_zeros'] = copy.copy( modis_data['Cloud_Water_Path']) modis_data['Cloud_Water_Path_zeros'][np.isnan( modis_data['Cloud_Water_Path_zeros'])] = 0 modis_data['albedo'] = ( (1 - 0.85) * modis_data['Cloud_Optical_Thickness']) / ( 2 + (1 - 0.85) * modis_data['Cloud_Optical_Thickness']) # Mask based on tau and re >3 mask_tau = [(modis_data['Cloud_Optical_Thickness'] < 3) * (modis_data['Cloud_Effective_Radius'] < 3)][0] modis_data['Cloud_Water_Path'][mask_tau] = np.nan mask_tau_all = [(modis_data['Cloud_Optical_Thickness_all'] < 3) * (modis_data['Cloud_Effective_Radius_all'] < 3)][0] modis_data['Cloud_Water_Path_all'][mask_tau_all] = np.nan # Create masks Multi_temp = copy.copy(modis_data['Cloud_Multi_Layer_Flag']) Multi_temp[np.isnan(Multi_temp)] = 1 # NaN is no retrieval Multi_temp[Multi_temp > 1] = 0 cld_mask_temp = np.zeros(np.shape(modis_data['Cloud_Mask_1km'][:, :, 0])) cld_mask_temp[modis_data['Cloud_Mask_1km'][:, :, 0] <= 0] = 1 cld_temperature_temp = np.zeros( np.shape(modis_data['cloud_top_temperature_1km'])) cld_temperature_temp[modis_data['cloud_top_temperature_1km'] <= 273] = 1 #modis_data['MODIS_CF'] = np.zeros(dims_data) * np.nan for ff, f in enumerate(range(dims_data)): ## Masking # Create tau CF tau_temp = modis_data['Cloud_Optical_Thickness'][ data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]] modis_data['tau_CF'][ff] = sum( sum(~np.isnan(tau_temp))) / np.size(tau_temp) #modis_data['MODIS_CF'][ff] = np.mean(modis_data['Cloud_Fraction_1km'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) # Size of the box if not any(data[ff][0]) or not any(data[ff][1]): continue # Size of the box if data[ff][0].shape[0] < 75: continue # Create masks # Cloud multi layer if np.sum(np.sum(Multi_temp[data[ff][0][0]:data[ff][0][-1],data[ff][1][0]:data[ff][1][-1]])) / \ Multi_temp[data[ff][0][0]:data[ff][0][-1],data[ff][1][0]:data[ff][1][-1]].size < 0.95: # Multi-layer threshold (1 is single layer, so I want at least95% to be single continue # Cloud mask threshold - land is out if np.sum(np.sum(cld_mask_temp[data[ff][0][0]:data[ff][0][-1],data[ff][1][0]:data[ff][1][-1]])) / \ cld_mask_temp[data[ff][0][0]:data[ff][0][-1],data[ff][1][0]:data[ff][1][-1]].size > 0.05: continue # Cloud temperature mask if np.sum(np.sum(cld_temperature_temp[data[ff][0][0]:data[ff][0][-1],data[ff][1][0]:data[ff][1][-1]])) / \ cld_temperature_temp[data[ff][0][0]:data[ff][0][-1],data[ff][1][0]:data[ff][1][-1]].size > 0.05: continue ## Mean and std of other variables product['LWP_mean'][ff] = np.nanmean( modis_data['Cloud_Water_Path'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['LWP_std'][ff] = scipy.nanstd( modis_data['Cloud_Water_Path'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['LWP_median'][ff] = np.nanmedian( modis_data['Cloud_Water_Path'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['LWP_mean_zeros'][ff] = np.nanmean( modis_data['Cloud_Water_Path_zeros'] [data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['LWP_std_zeros'][ff] = scipy.nanstd( modis_data['Cloud_Water_Path_zeros'] [data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['LWP_skewness'][ff] = scipy.stats.skew(np.ravel( modis_data['Cloud_Water_Path'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]), nan_policy='omit') product['LWP_all_mean'][ff] = np.nanmean( modis_data['Cloud_Water_Path_all'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['LWP_all_skewness'][ff] = scipy.stats.skew(np.ravel( modis_data['Cloud_Water_Path_all'] [data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]), nan_policy='omit') product['LWP_all_std'][ff] = scipy.nanstd( modis_data['Cloud_Water_Path_all'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['tau_skewness'][ff] = scipy.stats.skew(np.ravel( modis_data['Cloud_Optical_Thickness'] [data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]), nan_policy='omit') product['LWP_kurtosis'][ff] = scipy.stats.kurtosis(np.ravel( modis_data['Cloud_Water_Path'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]), nan_policy='omit') product['tau_kurtosis'][ff] = scipy.stats.kurtosis(np.ravel( modis_data['Cloud_Optical_Thickness'] [data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]), nan_policy='omit') product['CDNC_mean'][ff] = np.nanmean( modis_data['CDNC'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['CDNC_std'][ff] = scipy.nanstd( modis_data['CDNC'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['CDNC_median'][ff] = np.nanmedian( modis_data['CDNC'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['re_mean'][ff] = np.nanmean( modis_data['Cloud_Effective_Radius'] [data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['CF_mean'][ff] = np.nanmean( modis_data['tau_CF'][ff] ) #np.nanmean(modis_data['tau_CF'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['CF_MODIS_mean'][ff] = np.nanmean( modis_data['Cloud_Fraction_1km'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['FFT_1D_max'][ff] = power_spectrum_line( modis_data['Cloud_Water_Path_zeros'] [data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['reflectance_mean'][ff] = np.nanmean( modis_data['Atm_Corr_Refl'][:, :, 0][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['reflectance_std'][ff] = scipy.nanstd( modis_data['Atm_Corr_Refl'][:, :, 0][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['albedo_mean'][ff] = np.nanmean( modis_data['albedo'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['reflectance_skewness'][ff] = scipy.stats.skew( np.ravel(modis_data['Atm_Corr_Refl'][:, :, 0] [data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]), nan_policy='omit') product['lat_center'][ff] = np.nanmean( modis_data['Latitude_1km'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) product['lon_center'][ff] = np.nanmean( modis_data['Longitude_1km'][data[ff][0][0]:data[ff][0][-1], data[ff][1][0]:data[ff][1][-1]]) data_scene = {} data_scene['LWP_mean'] = product['LWP_mean'][~np.isnan(product['LWP_mean'] )] data_scene['LWP_std'] = product['LWP_std'][~np.isnan(product['LWP_mean'])] data_scene['LWP_median'] = product['LWP_median'][ ~np.isnan(product['LWP_mean'])] data_scene['LWP_mean_zeros'] = product['LWP_mean_zeros'][ ~np.isnan(product['LWP_mean'])] data_scene['LWP_std_zeros'] = product['LWP_std_zeros'][ ~np.isnan(product['LWP_mean'])] data_scene['LWP_skewness'] = product['LWP_skewness'][ ~np.isnan(product['LWP_mean'])] data_scene['LWP_kurtosis'] = product['LWP_kurtosis'][ ~np.isnan(product['LWP_mean'])] data_scene['LWP_all_mean'] = product['LWP_all_mean'][ ~np.isnan(product['LWP_mean'])] data_scene['LWP_all_std'] = product['LWP_all_std'][ ~np.isnan(product['LWP_mean'])] data_scene['LWP_all_skewness'] = product['LWP_all_skewness'][ ~np.isnan(product['LWP_mean'])] data_scene['tau_skewness'] = product['tau_skewness'][ ~np.isnan(product['LWP_mean'])] data_scene['tau_kurtosis'] = product['tau_kurtosis'][ ~np.isnan(product['LWP_mean'])] data_scene['CDNC_mean'] = product['CDNC_mean'][~np. isnan(product['LWP_mean'])] data_scene['CDNC_std'] = product['CDNC_std'][~np.isnan(product['LWP_mean'] )] data_scene['CDNC_median'] = product['LWP_mean'][~np. isnan(product['LWP_mean'])] data_scene['re_mean'] = product['re_mean'][~np.isnan(product['LWP_mean'])] data_scene['CF_mean'] = product['CF_mean'][~np.isnan(product['LWP_mean'])] data_scene['CF_MODIS_mean'] = product['CF_MODIS_mean'][ ~np.isnan(product['LWP_mean'])] data_scene['FFT_1D_max'] = product['FFT_1D_max'][ ~np.isnan(product['LWP_mean'])] data_scene['reflectance_mean'] = product['reflectance_mean'][ ~np.isnan(product['LWP_mean'])] data_scene['reflectance_std'] = product['reflectance_std'][ ~np.isnan(product['LWP_mean'])] data_scene['reflectance_skewness'] = product['reflectance_skewness'][ ~np.isnan(product['LWP_mean'])] data_scene['albedo_mean'] = product['albedo_mean'][ ~np.isnan(product['LWP_mean'])] data_scene['f_name'] = np.size( data_scene['reflectance_mean']) * modis_data[ 'f_name'] #[~np.isnan(product['f_name'])] data_scene['lat_center'] = product['lat_center'][ ~np.isnan(product['LWP_mean'])] data_scene['lon_center'] = product['lon_center'][ ~np.isnan(product['LWP_mean'])] return data_scene
# store the name of the best fit and its p value print("Best fitting distribution: " + str(best_dist)) print("Best p value: " + str(best_p)) print("Parameters for the best fit: " + str(params[best_dist])) return best_dist, best_p, params[best_dist] x = stats.norm.rvs(loc=1, scale=2, size=100) y = np.random.uniform(-1, 1, size=1000) z = np.random.exponential(5, size=1000) meanN, stdN = 0, 0 for i in range(50): meanX, stdX = scipy.mean(x), scipy.nanstd(x) meanN += meanX stdN += stdX meanN /= 50 stdN /= 50 print('Mean and Standard deviation for normal distribution', meanX, stdX) meanU, stdU = 0, 0 for i in range(50): meanY, stdY = scipy.mean(y), scipy.nanstd(y) meanU += meanY stdU += stdY meanU /= 50 stdU /= 50 print('Mean and Standard deviation for uniform distribution', meanY, stdY)
def stat(x): return([round(nanmean(x), 5), round(nanstd(x), 5)])