def model_summary(original, model): print "# FIRST SIX MOMENTS; ORIGINAL-MODEL" omean = original.mean() mmean = model.mean() print omean, mmean for i in xrange(2, 7): morig = moment(original, i) mmodel = moment(model, i) print morig, mmodel print "\n" print "# HISTOGRAM ORIGINAL" for e, h in histogram(original): print e, h print "\n" print "# HISTOGRAM MODEL" for e, h in histogram(model): print e, h print "\n" print "# AUTOCORRELATION FUNCTION ACF(LAG); ORIGINAL-MODEL" original_acf = acf(original) model_acf = acf(model) for i in xrange(0, 100): print i, original_acf[i], model_acf[i] print "\n"
def motionpattern(kppatch,N_sq): k = len(kppatch) patch_array = np.zeros(shape=[24,24,N_sq]) feature = np.zeros(shape=[k,24,24,3]) for i in range(0,len(kppatch)): # for ith keypoint patch = kppatch[i] # for ith keypoint, we have N_sq frames, so the dimentsion of patch is 24*24*N_sq # We need to calculate central moments for this N_sq data which is 24*24 #print patch[0].shape for m in range(0,N_sq): #print patch[m].shape patch_array[:,:,m] = patch[m] #print patch_array.shape feature[i,:,:,0] = stats.moment(patch_array,moment=2,axis=2) feature[i,:,:,1] = stats.moment(patch_array,moment=3,axis=2) feature[i,:,:,2] = stats.moment(patch_array,moment=4,axis=2) feature = np.reshape(feature,(k,1728)) print 'shape' print feature.shape return feature
def sb_algorithm(image, k): # Casting image into a masked array for further processing masked_image = np.ma.masked_array(image, mask=np.zeros(image.shape)) while True: # Calculate number of remaining pixels in image N = masked_image.size - np.sum(masked_image.mask) # Calculate background statistics bkgd_mean = masked_image.mean() bkgd_std = masked_image.std() bkgd_var = masked_image.var() bkgd_mom3 = moment(masked_image.flatten(), 3) bkgd_mom4 = moment(masked_image.flatten(), 4) s_of_variance = np.sqrt(1 / N * (bkgd_mom4 - (N - 3) / (N - 1) * bkgd_mom3)) # Remove maximum pixel from background if abs(bkgd_var - bkgd_mean) > k * s_of_variance: max_index = masked_image.argmax() max_index = np.unravel_index(max_index, masked_image.shape) masked_image.mask[max_index] = 1 else: break return masked_image
def get_stats(obs_dataarray): noise_var = obs_dataarray['noise_var'].values obs_arr = obs_dataarray['obs'].stack(s=['y', 'x']).values m2_biased = moment(obs_arr, moment=2, axis=-1, nan_policy='omit') m3_biased = moment(obs_arr, moment=3, axis=-1, nan_policy='omit') m4_biased = moment(obs_arr, moment=4, axis=-1, nan_policy='omit') m2_unbiased = m2_biased - noise_var m3_unbiased = m3_biased m4_unbiased = m4_biased - (6 * m2_unbiased * noise_var) - \ (3 * noise_var ** 2) v_biased = m2_biased s_biased = m3_biased / m2_biased ** (3 / 2) k_biased = (m4_biased / m2_biased ** 2) - 3 v_unbiased = m2_unbiased s_unbiased = m3_unbiased / m2_unbiased ** (3 / 2) k_unbiased = (m4_unbiased / m2_unbiased ** 2) - 3 return xr.Dataset( {'m2_biased': (['f'], m2_biased), 'm3_biased': (['f'], m3_biased), 'm4_biased': (['f'], m4_biased), 'm2_unbiased': (['f'], m2_unbiased), 'm3_unbiased': (['f'], m3_unbiased), 'm4_unbiased': (['f'], m4_unbiased), 'v_biased': (['f'], v_biased), 's_biased': (['f'], s_biased), 'k_biased': (['f'], k_biased), 'v_unbiased': (['f'], v_unbiased), 's_unbiased': (['f'], s_unbiased), 'k_unbiased': (['f'], k_unbiased)}, coords={'f': obs_dataarray.coords['f']} )
def curtosi(V, m='from_file'): if (m == 'from_file'): M4 = np.mean(np.power(V, 4)) M2 = np.mean(np.power(V, 2)) return 1. - M4 / (3. * M2 * M2) else: return (moment(V, 4) / moment(V, 2)**2) - 3
def get_features(sig, freq): """This function computes statistics for the given signal @sig. For information on statistical moments: https://en.wikipedia.org/wiki/Moment_(mathematics) :param sig: an array containing points that pertain to the signal :param freq: the frequency of the signal in Hertz """ s = Signal(sig, freq) # Frequency domain (fd) fd = s.to_freq_domain() max_freq_idx = np.argmax(fd.amps) # Frequency with highest amplitude max_freq = fd.fs[max_freq_idx] min_freq_idx = np.argmin(fd.amps) # Frequency with lowest amplitude min_freq = fd.fs[min_freq_idx] # print "fd.fs", fd.fs # print "freqs: min: %f, max: %f"%(min_freq, max_freq) # Normalize frequency domain histogram nbins = 10 normalized_fd = normalize_frequencies(fd, nbins) # import pdb; pdb.set_trace() feat_array = np.array([ np.mean(sig), stats.moment(sig, 2, axis=0), stats.moment(sig, 3, axis=0), stats.moment(sig, 4, axis=0), np.max(sig), np.min(sig), max_freq, min_freq ]) # return feat_array return normalized_fd
def describe(data): """Return summary statistics of as set of data""" mean = np.mean(data) var = moment(data,2) skew = moment(data,3)/var**1.5 kurt = moment(data,4)/var**2 return (mean,var,skew,kurt)
def describe(data): """Return summary statistics of as set of data""" mean = sum(data) / len(data) var = moment(data, 2) skew = moment(data, 3) / var**1.5 kurt = moment(data, 4) / var**2 return (mean, var, skew, kurt)
def test_moment(self): dense_table, dense_not_inst_table, original_data = self._gen_table_data( ) summary_obj = MultivariateStatisticalSummary(dense_table, error=0, stat_order=4, bias=False) header = dense_table.schema['header'] from scipy import stats moment_3 = stats.moment(original_data, 3, axis=0) moment_4 = stats.moment(original_data, 4, axis=0) skewness = stats.skew(original_data, axis=0, bias=False) kurtosis = stats.kurtosis(original_data, axis=0, bias=False) summary_moment_3 = summary_obj.get_statics("moment_3") summary_moment_4 = summary_obj.get_statics("moment_4") static_skewness = summary_obj.get_statics("skewness") static_kurtosis = summary_obj.get_statics("kurtosis") # print(f"moment: {summary_moment_4}, moment_2: {moment_4}") for idx, col_name in enumerate(header): self.assertTrue( self._float_equal(summary_moment_3[col_name], moment_3[idx])) self.assertTrue( self._float_equal(summary_moment_4[col_name], moment_4[idx])) self.assertTrue( self._float_equal(static_skewness[col_name], skewness[idx])) self.assertTrue( self._float_equal(static_kurtosis[col_name], kurtosis[idx]))
def main(currPath, inputFileName, showPlots): inPath = currPath + '\\Inputs' dataHeader = 'Depth' # read in the inputs settings = common.readSettingsCSV(inPath + '\\' + inputFileName) common.showSettings(settings) runID = settings['runID'] dataFileName = settings['dataFileName'] distributionType = settings['distribution'] minAtZero = checkTrue(settings['fixMinAtZero']) xTitle = settings['xTitle'] outPath = currPath + '\\Reports' if os.path.isdir(outPath) == False: # create directory if it doesn't exist os.mkdir(outPath) # read in the data data = common.readCSVwithHeaders(inPath + '\\' + dataFileName) data = np.array(data[dataHeader]) # create the distribution from the data with default initialisation (MLE for most distributions) dist = Distribution(data, distributionType, minAtZero) # check fit on probability paper [sqrErr, mean, stdDev, lowerBound] = probPlotSqrErr( data, dist, distributionType, xTitle, 'MLE', runID, outPath, showPlots) # output: [sqrErr, mean, stdDev, lowerBound] print(sqrErr, mean, stdDev**2, lowerBound) # fit distribution and check fit again on probability paper fitType = 'MOM' isConverged = dist.fit( data, fit=fitType) # fit='MLE' # fit='MOM' # fit='quantiles' if isConverged: [sqrErr, mean, stdDev, lowerBound] = probPlotSqrErr( data, dist, distributionType, xTitle, fitType, runID, outPath, showPlots) # output: [sqrErr, mean, stdDev, lowerBound] print(sqrErr, mean, stdDev**2, lowerBound) print('dist.moments(): ', dist.moments()) print('dist central moments: ', dist.distObj.stats(moments='mvs')) print('data central moments: ', stats.moment(data, moment=1), stats.moment(data, moment=2), stats.moment(data, moment=3)) print('data non-central moments:', moment(data, 1), moment(data, 2), moment(data, 3)) print('data mvs: ', np.mean(data), np.var(data, ddof=1), stats.skew(data)) # try creating a sample from the fit distribution sample = dist.ppf(np.random.random(1E6)) print('sample mvs: ', np.mean(sample), np.var(sample, ddof=1), stats.skew(sample)) print('data mean stdDev min: ', np.mean(data), np.std(data, ddof=1), np.min(data)) print('dist mean stdDev min: ', mean, stdDev, lowerBound) else: print('distribution fit did not converge') return
def moment(self, k): if all(is_scalar(x) for x in self): return stats.moment(np.array(self), k) elif get_dimension(self) > 0: return tuple(stats.moment(np.array(self), k, 0)) else: raise Exception( "I don't know how to find the moment of these values.")
def getStats(curr_filter): mean = torch.mean(curr_filter) std = torch.std(curr_filter) mom3 = moment(curr_filter, 3, None) mom4 = moment(curr_filter, 4, None) num_e = curr_filter.numel() return mean, std, mom3, mom4, num_e
def plot_summary(data,col_name): var=data[col_name].values summary_df=pd.DataFrame({'min':[np.min(var)],'mean':[np.mean(var)], 'D1' : [np.percentile(var,10)], 'Q1' : [np.percentile(var,25)], 'Interquartile Range ':[ss.iqr(var)], 'median ':[np.median(var)],'Mode ':[ss.mode(var)], 'Q3' : [np.percentile(var,75)], 'D9' : [np.percentile(var,90)], 'max':[np.mean(var)],'varince':[np.mean(var)], 'Coefficent of variation ':[ss.variation(var)], '2nd central moment ':[ss.moment(var,moment=2)], '3rd central moment ':[ss.moment(var,moment=3)], 'kurtosis ':[ss.kurtosis(var)],'skewness ':[ss.skew(var)], 'length ':len(var) }).T summary_df.columns=['statistics_value'] summary_df=round(summary_df,3) fig = go.Figure(data=[go.Table( header=dict(values=list(['statistic','values']), fill_color='royalblue', align='center', font=dict(color='white', size=18)), cells=dict(values=[summary_df.index, summary_df.values], fill_color='rgb(107, 174, 214)', align='left', font=dict(color='black', size=14))) ]) fig.update_layout(title='statistic summary of univariate data',plot_bgcolor='white',paper_bgcolor='white',font_color='black', width=600, height=800,font=dict(size=18) ,title_x=0.5) return fig.show() def plot_histogram(df,col_name): fig = px.histogram(df, x=col_name,nbins=30) fig.add_annotation(x=np.percentile(df[col_name],25),y=0, text="Q1", showarrow=True, arrowhead=1) fig.add_annotation(x=np.percentile(df[col_name],50),y=0, text="Median", showarrow=True, arrowhead=1) fig.add_annotation(x=np.percentile(df[col_name],75),y=0, text="Q3", showarrow=True, arrowhead=1) fig.add_annotation(x=np.percentile(df[col_name],10),y=0, text="D1", showarrow=True, arrowhead=1) fig.add_annotation(x=np.percentile(df[col_name],90),y=0, text="D9", showarrow=True, arrowhead=1) fig.update_layout(title='Univariate data historgam',plot_bgcolor='white',paper_bgcolor='white',font_color='black', width=800, height=800,font=dict(size=18) ,title_x=0.5) return fig.show()
def moments(self, data): mean = np.mean(data) var = np.var(data) skew = moment(data,3)/var**1.5 kurt = moment(data,4)/var**2 mind = min(data) maxd = max(data) std = var**.5 return (mean,std,skew,kurt, mind, maxd)
def _beta(k, dist): from scipy.stats import moment if k % 2 == 1: k = (k-1)/2 beta_r = moment(dist,3)*moment(dist, 2*k+3)/(moment(dist, 2)**(k+3)) elif k % 2 == 0: k = k/2 beta_r = moment(dist, 2*k + 2)/(moment(dist, 2)**(k+1)) return beta_r
def betta(data: np.ndarray, k: float) -> np.ndarray or None: if k % 2 == 1: k = (k - 1) / 2 return moment(data, 3) * moment(data, 2 * k + 3) / (moment(data, 2) **(k + 3)) elif k % 2 == 0: k = k / 2 return moment(data, 2 * k + 2) / (moment(data, 2)**(k + 1)) else: return None
def aggregation(nbrvals, dummy_flag = False, d = 2): res = {'min': np.min(nbrvals, axis=1), 'max': np.max(nbrvals, axis=1), 'mean': np.mean(nbrvals, axis=1), 'std': np.std(nbrvals, axis=1), 'sum': np.sum(nbrvals, axis=1), '3rdmom': moment(nbrvals, moment=3, axis=1), '4thmom': moment(nbrvals, moment=4, axis=1)} if dummy_flag: inival = np.zeros(d) res = {'min': inival, 'max': inival, 'mean': inival, 'std': inival, 'sum': inival, '3rdmom': inival, '4thmom': inival} return res
def test_moment(self): """ mean((testcase-mean(testcase))**power,axis=0),axis=0))**power))""" y = stats.moment(self.testcase, 1) assert_approx_equal(y, 0.0, 10) y = stats.moment(self.testcase, 2) assert_approx_equal(y, 1.25) y = stats.moment(self.testcase, 3) assert_approx_equal(y, 0.0) y = stats.moment(self.testcase, 4) assert_approx_equal(y, 2.5625)
def normalization_classification(batch_data_list, types_list): normalized_data = [] normalization_parameters = [] for i in range(len(types_list)): observed_data = batch_data_list[:, i] if types_list[i]['type'] == 'real': # We transform the data to a gaussian with mean 0 and std 1 data_mean = np.mean(observed_data) data_var = moment(observed_data, 2) data_var = np.clip(data_var, 1e-6, 1e20) data_std = np.sqrt(data_var) aux_X = preprocessing.scale(observed_data) normalized_data.append(aux_X) normalization_parameters.append([data_mean, data_std]) # When using log-normal elif types_list[i]['type'] == 'pos': # #We transform the log of the data to a gaussian with mean 0 and std 1 observed_data = observed_data data_mean = np.mean(observed_data) data_var = moment(observed_data, 2) data_var = np.clip(data_var, 1e-6, 1e20) # Avoid zero values data_std = np.sqrt(data_var) aux_X = preprocessing.scale(observed_data) normalized_data.append(aux_X) normalization_parameters.append([data_mean, data_std]) elif types_list[i]['type'] == 'count': # Input log of the data observed_data = observed_data data_mean = np.mean(observed_data) data_var = moment(observed_data, 2) data_var = np.clip(data_var, 1e-6, 1e20) # Avoid zero values data_std = np.sqrt(data_var) aux_X = preprocessing.scale(observed_data) normalized_data.append(aux_X) normalization_parameters.append([data_mean, data_std]) else: # Don't normalize the categorical and ordinal variables normalized_data.append(observed_data) normalization_parameters.append([0.0, 1.0]) # No normalization here return normalized_data, normalization_parameters
def test_moment(self): for n in self.get_n(): x, y, xm, ym = self.generate_xy_sample(n) r = stats.moment(x) rm = stats.mstats.moment(xm) assert_almost_equal(r, rm, 10) r = stats.moment(y) rm = stats.mstats.moment(ym) assert_almost_equal(r, rm, 10)
def computeStats(yy): temp = [] temp.append(yy.mean()) temp.append(yy.var()) temp.append(yy.std()) temp.append(np.median(yy)) temp.append(stats.skew(yy)) temp.append(stats.kurtosis(yy)) temp.append(stats.moment(yy, 3)) temp.append(stats.moment(yy, 4)) return (temp)
def test_moment(self): """ mean((testcase-mean(testcase))**power,axis=0),axis=0))**power))""" y = stats.moment(self.testcase,1) assert_approx_equal(y,0.0,10) y = stats.moment(self.testcase,2) assert_approx_equal(y,1.25) y = stats.moment(self.testcase,3) assert_approx_equal(y,0.0) y = stats.moment(self.testcase,4) assert_approx_equal(y,2.5625)
def box_error_of_error(self): var = np.var(self.dd_box, axis=1) std = np.std(self.dd_box, axis=1) u2 = stats.moment(self.dd_box, moment=2, axis=1) u4 = stats.moment(self.dd_box, moment=4, axis=1) N = self.dd_box.shape[1] var_var = (N - 1.0)**2.0 / N**3.0 * u4 - (N - 1.0) * ( N - 3.0) * u2**2.0 / N**3.0 std_var = np.sqrt(var_var) std_std = 0.5 / std * std_var return std_std / self.wp_mean
def get_stats(data, noise_err=None, noise_err_npix=None): """ Calculate variance, skewness and kurtosis of the data with an option to propagate noise error. Flatten data array is used in the calculation. The noise is analytically added, assuming if the "true" data is corrupted by some random noise with standard deviation of `noise_err`. Propagated error of the kurtosis is not implemented at the moment. Parameters ---------- data : array-like Data to calculate statistics. noise_err : float or array-like, optional Noise error to propagate to the statistics. If array-like, each noise error in the array will be propagate individually, returning multiple propagated errors for each noise error. noise_err_npix : float, optional Assumed number of independent npix in the noise calculation. Default is every pixel is independent. Returns ------- ndarray([min, max, mean, variance, skewness, kurtosis, (noise1, var_err1, skew_err1, noise2, var_err2, skew_err2, noise3, var_err3, skew_err3, ... (if any))] """ npix = np.size(data) dmin = np.min(data) dmax = np.max(data) dmean = np.mean(data) m2 = moment(data, moment=2, axis=None) m3 = moment(data, moment=3, axis=None) m4 = moment(data, moment=4, axis=None) skew = m3 / m2 ** (3. / 2.) kurt = (m4 / m2 ** 2) - 3. stat_vals = (dmin, dmax, dmean, m2, skew, kurt) if noise_err is not None: if noise_err_npix is None: noise_err_npix = npix m6 = moment(data, moment=6, axis=None) m2_err, skew_err, kurt_err = propagate_noise_error( noise_err, m2, m3, m4, m6, noise_err_npix ) prop_noise_err = np.vstack( (noise_err, m2_err, skew_err, kurt_err) ).T.ravel() out = np.hstack((stat_vals, prop_noise_err)) else: out = np.array(stat_vals) return out
def calc_statistical_moments(signal): m = np.mean(signal) v = np.var(signal) m3 = moment(signal, moment=3) m4 = moment(signal, moment=4) if v > 0: sk = m3 / (math.sqrt(math.pow(v, 3))) kurt = (m4 / math.pow(v, 2)) - 3 else: sk = 0 kurt = 0 return m, v, sk, kurt
def setDataAndCalc(self, _data, showSimRes=True, N=1000000): #data must be standardized self.mean = np.mean(_data) self.std = np.std(_data) std_data = (_data - self.mean)/self.std self.skew = moment(std_data,3) self.kurt = moment(std_data,4) exkurt = self.kurt - 3.0#need to remove 3 for standard distribution ->excess kurtosis self.min=min(_data) self.max=max(_data) self.mmnts=[self.mean,self.std,self.skew,self.kurt, self.min, self.max] self._endInitCalc(self.skew,exkurt, showSimRes, N)
def get_kurtosis(signal): """ Kurtosis of signal. :param signal: input signal. :type signal: array :returns: kurtosis """ μ_2 = moment(signal, 2) μ_4 = moment(signal, 4) k_u = μ_4 / μ_2**2 return k_u
def get_stats(data, noise_err=None, noise_err_npix=None): """ Calculate variance, skewness and kurtosis of the data with an option to propagate noise error. Flatten data array is used in the calculation. The noise is analytically added, assuming if the "true" data is corrupted by some random noise with standard deviation of `noise_err`. Propagated error of the kurtosis is not implemented at the moment. Parameters ---------- data : array-like Data to calculate statistics. noise_err : float or array-like, optional Noise error to propagate to the statistics. If array-like, each noise error in the array will be propagate individually, returning multiple propagated errors for each noise error. noise_err_npix : float, optional Assumed number of independent npix in the noise calculation. Default is every pixel is independent. Returns ------- ndarray([min, max, mean, variance, skewness, kurtosis, (noise1, var_err1, skew_err1, noise2, var_err2, skew_err2, noise3, var_err3, skew_err3, ... (if any))] """ npix = np.size(data) dmin = np.min(data) dmax = np.max(data) dmean = np.mean(data) m2 = moment(data, moment=2, axis=None) m3 = moment(data, moment=3, axis=None) m4 = moment(data, moment=4, axis=None) skew = m3 / m2**(3. / 2.) kurt = (m4 / m2**2) - 3. stat_vals = (dmin, dmax, dmean, m2, skew, kurt) if noise_err is not None: if noise_err_npix is None: noise_err_npix = npix m6 = moment(data, moment=6, axis=None) m2_err, skew_err, kurt_err = propagate_noise_error( noise_err, m2, m3, m4, m6, noise_err_npix) prop_noise_err = np.vstack( (noise_err, m2_err, skew_err, kurt_err)).T.ravel() out = np.hstack((stat_vals, prop_noise_err)) else: out = np.array(stat_vals) return out
def Ricci_moment_c(img, out_dir, type): img = cv2.imread(img) R_img = Ricci_img_c(img) width, height = R_img.shape R_reshape_img = np.reshape(R_img, (width * height)) Moment1 = moment(R_reshape_img, moment=1) Moment2 = moment(R_reshape_img, moment=2) Moment3 = moment(R_reshape_img, moment=3) Moment4 = moment(R_reshape_img, moment=4) plt.figure(figsize=(30, 30)) plt.title('Org'), plt.imshow(img) plt.savefig( str(out_dir + '-ORG' + type + '-' + 'COMB' + '-' + str(uuid.uuid4()) + '.png')) plt.figure(figsize=(30, 30)) plt.title('Ricci image'), plt.imshow(R_img) plt.savefig( str(out_dir + type + '-RIMG' + '-' + 'COMB' + '-' + str(uuid.uuid4()) + '.png')) plt.figure(figsize=(30, 30)) plt.title('Hist for Normal with Geometry weight'), plt.ylim( ymax=9000), plt.xlim(-100, 900) plt.hist(R_reshape_img, 100) for area in [ 'Moment 1 = ' "%.2f" % Moment1, 'Moment 2 = ' "%.2f" % Moment2, 'Moment 3 = ' "%.2f" % Moment3, 'Moment 4 = ' "%.2f" % Moment4 ]: plt.scatter([], [], label=str(area)) plt.legend(bbox_to_anchor=(1.05, 1), loc=2, title='Set of Moments') plt.savefig( str(out_dir + type + '-HIST' + '-' + 'COMB' + '-' + str(uuid.uuid4()) + '.png')) plt.close() return Moment1, Moment2, Moment3, Moment4
def calc_moments(timeseries_file, moment): """Returns nth moment (3 for skewness, 4 for kurtosis) of timeseries (list of values; one per timeseries). Keyword arguments: timeseries_file -- text file with white space separated timepoints in rows """ timeseries = np.genfromtxt(timeseries_file) m2 = stats.moment(timeseries, 2, axis=0) m3 = stats.moment(timeseries, moment, axis=0) zero = m2 == 0 return np.where(zero, 0, m3 / m2 ** (moment / 2.0))
def core(values: np.ndarray, year: str) -> None: # Compute stats N = len(values) mean = np.mean(values) median = np.median(values) std = np.std(values) _min, _max = np.min(values), np.max(values) acfs = sm.tsa.acf(values, fft=False) moment_3 = moment(values, 3) / (std**3) moment_4 = moment(values, 4) / (std**4) - 3 print( f"{year} & {N} & {mean:0.5f} & {median:0.5f} & {std:0.5f} & {_min:0.5f} & {_max:0.5f} & {moment_3:0.5f} & {moment_4:0.5f} \\\\" )
def get_stats(dataarray): arr = dataarray.stack(s=['y', 'x']).values m2 = moment(arr, moment=2, axis=-1, nan_policy='omit') m3 = moment(arr, moment=3, axis=-1, nan_policy='omit') m4 = moment(arr, moment=4, axis=-1, nan_policy='omit') v = m2 s = m3 / m2 ** (3 / 2) k = (m4 / m2 ** 2) - 3 return xr.Dataset( {'m2': (['f'], m2), 'm3': (['f'], m3), 'm4': (['f'], m4), 'v': (['f'], v), 's': (['f'], s), 'k': (['f'], k)}, coords={'f': dataarray.coords['f']} )
def skew(df): """ How to read skew value? g > 0 : Skewed to right g = 0 : Symmetric g < 0 : Skewed to left ------- To convert excel skewness to a real one, please consider the function "convert_excel_skew(G, n)" """ m2 = moment(df, moment=2) m3 = moment(df, moment=3) skew_f = m3 / pow(pow(m2, 0.5), 3) return skew_f
def summary_stats(collection: Dict[str, pd.DataFrame]) -> None: """ Create summary statistics for prices/returns in each day of week. """ days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"] days_short = ["Mon", "Tue", "Wed", "Thu", "Fri"] print("Day, N, mean, std, moment") for d in days: values = collection[d].dropna().values print("{} & {} & {:0.3f} & {:0.3f} & {:0.3f} & {:0.3f}\\\\".format( d, len(collection[d].dropna()), np.mean(values), np.std(values), moment(values, moment=3) / (np.std(values)**3), moment(values, moment=4) / (np.std(values)**4) - 3))
def doane_bin(data): n = data.count() # print "doane", n if n == 0 or n == 1: return 1 else: std = np.std(data) g_1 = abs( s.moment(data,3) / s.moment(data, 2)) std_g_1 = Decimal(6 * (n - 2)) / Decimal( (n + 1) * (n + 2) ) std_g_1 = math.sqrt(std_g_1) bins = round(1 + np.log2(n) + np.log2(1+g_1/std_g_1)) # debug # print "n ", n, " std ", std, " g_1 ", g_1, " std_g_1 ", std_g_1, " bins " return bins
def features_compute(Pxx): # compute full band spectral entropy Pxx_hat = Pxx / np.sum(Pxx) # normalize the psd print(sum(Pxx_hat)) ent = entropy(Pxx_hat, axis=0) # compute moments of spectrum m1 = np.mean(Pxx) m2 = np.std(Pxx) m3 = moment(Pxx, moment=3) m4 = moment(Pxx, moment=4) return ent, m1, m2, m3, m4
def calc_moments(timeseries_file, moment): """Returns nth moment (3 for skewness, 4 for kurtosis) of timeseries (list of values; one per timeseries). Keyword arguments: timeseries_file -- text file with white space separated timepoints in rows """ timeseries = np.genfromtxt(timeseries_file) m2 = stats.moment(timeseries, 2, axis=0) m3 = stats.moment(timeseries, moment, axis=0) zero = (m2 == 0) return np.where(zero, 0, m3 / m2**(moment / 2.0))
def test_moments_normal_distribution(self): np.random.seed(32149) data = np.random.randn(12345) moments = [] for n in [1, 2, 3, 4]: moments.append(stats.kstat(data, n)) expected = [0.011315, 1.017931, 0.05811052, 0.0754134] assert_allclose(moments, expected, rtol=1e-4) # test equivalence with `stats.moment` m1 = stats.moment(data, moment=1) m2 = stats.moment(data, moment=2) m3 = stats.moment(data, moment=3) assert_allclose((m1, m2, m3), expected[:-1], atol=0.02, rtol=1e-2)
def kurtosis(df): """ How to read the kurtosis value? K > 3 : Leptokurtic (Narrow-tall) K = 3 : Mesokurtic (Regular) K < 3 : Platykurtic (Wide-low) ------- To convert excel kurtosis to a real one, please consider the function "convert_excel_kurtosis(K, n)" """ m2 = moment(df, moment=2) m4 = moment(df, moment=4) kurtosis_f = m4 / pow(m2, 2) return kurtosis_f
def build_robust_fl_dist_with_stats( fragment_lengths ): """Trim outliers from a numpy array of fragment lengths. First, we estimate the mean and sd on the trimmed data. Then we use the initial estimate to truncate at +/- NUM_SDS SD's Calculate statistics for use in read_group distribution clustering """ NUM_SDS = 4 sorted_fls = numpy.sort( fragment_lengths ) ## find the initial estiamte of the mean and sd # if the list is long, trim the top and bottom 5% if len( sorted_fls ) > 40: fifteen_percent_cnt = int( len( sorted_fls )*.15 ) guess = sorted_fls[ fifteen_percent_cnt:-fifteen_percent_cnt ] mn = numpy.mean( guess ) sd = numpy.std( guess ) lower_bnd = max( 0, mn - NUM_SDS*sd ) upper_bnd = mn + NUM_SDS*sd # if the list is too short, just include everything else: lower_bnd = sorted_fls[0] upper_bnd = sorted_fls[-1] new_fls = sorted_fls[ (sorted_fls > lower_bnd) & (sorted_fls < upper_bnd) ] if len( new_fls ) == 0: print sorted_fls print lower_bnd print upper_bnd print >> sys.stderr, "WARNING: There aren't any fragments after filtering." new_fls = sorted_fls lower_bnd = new_fls[0] upper_bnd = new_fls[-1] # calculate mean, standard deviation and skew as well as associated SEs from scipy.stats import skew, moment, sem from math import sqrt mn = numpy.mean( new_fls ) sd = numpy.std( new_fls ) skw = skew( new_fls ) n = len( new_fls ) # calculate SE for mean, sd and skew se_m = sd / sqrt(n) se_sd = sqrt( (2 / float(n**2 - n)) * moment( new_fls, 4) ) se_sk = sqrt( float( 6*n*(n-1) )/float( (n-2) * (n+1) * (n-3) ) ) #assert skw - se_sk < 0, 'skew: %f skew_standard_error: %f' % (skew, se_sk) stats = ( (mn, sd, skw) , (se_m, se_sd, se_sk) ) # build the empirical distribution emp_dist = numpy.zeros( upper_bnd - lower_bnd + 1 ) for fl in new_fls: emp_dist[ fl - lower_bnd ] += 1 emp_dist = emp_dist/emp_dist.sum() # build the fl dist object fl_dist = FlDist( int(lower_bnd), int(upper_bnd), emp_dist, stats ) return fl_dist
def moment_per_channel(self, data, n): """ Calculate the Nth moment per channel. A moment is a specific quantitative measure of the shape of a set of points. It is often used to calculate coefficients of skewness and kurtosis due to its close relationship with them. """ return stats.moment(data, moment=n, axis=data.ndim - 1)
def statxture(pixels): """computes a variety of texture stats from the image histogram. See Digital Image Processing Using MATLAB, ch. 11""" average_gray_level = np.mean(pixels) average_contrast = np.std(pixels) H = histogram(pixels)[0] H = H / (1. * len(pixels)) L = len(H) d = (L - 1.)**2 normvar = np.var(pixels) / d smoothness = 1. - 1. / (1. + normvar) third_moment = moment(pixels,3) / d uniformity = np.sum(H**2) eps = np.finfo(float).eps entropy = 0. - np.sum(H * np.log2(H + eps)) return average_gray_level, average_contrast, smoothness, \ third_moment, uniformity, entropy
def generate_fleishman_from_collection(data, N=10000): mean = np.mean(data) std = moment(data,2)**0.5 std_data = (data - mean)/std coeff = fit_fleishman_from_standardised_data(std_data) return (generate_fleishman(-coeff[1],*coeff, N=N))*std+mean
def get_average(filename): with fits.open(filename) as fits_object: hdulist = [elem.header for elem in fits_object if elem.__class__.__name__ == 'ImageHDU'] # Get info from the first amplifier header. first_seg = hdulist[0] seg_length = first_seg['NAXIS1']-1 # 544 for default image seg_width = first_seg['NAXIS2']-1 # 2048 for default image DATASEC = getCoord(first_seg['DATASEC']) # Assume 'DETSIZE' is same for all segments/amplifiers. # return an array of size 16 with noise of each extension noise = np.zeros(16) for r in range(1, 17): # 16 extensions, from 1 to 16 pixel_sum = 0 pixel_count = 0 fitsdata = fits_object[r].data upper_region = fitsdata[DATASEC[3]:seg_width, DATASEC[0]:DATASEC[1]].flatten() pre_post = np.append(fitsdata[0:seg_width, 0:DATASEC[0]].flatten(), fitsdata[0:seg_length, DATASEC[1]:seg_length].flatten()) overscan = np.append(pre_post, upper_region) noise[r-1] = np.sqrt(sp.moment(overscan, 2)) #for i in range(0, 10): # for j in range(0, 2047): # pixel_sum += fitsdata[j, i] #pixel_sum += sum(map(sum, fitsdata[0:seg_width, 0:DATASEC[0]])) #prescan region #pixel_sum += sum(map(sum, fitsdata[0:seg_length, DATASEC[1]:seg_length])) # postscan region #pixel_sum += sum(map(sum, fitsdata[DATASEC[3]:seg_width, DATASEC[0]:DATASEC[1]])) # upper region #pixel_count = (DATASEC[0] * DATASEC[3] + DATASEC[1] * DATASEC[3]) + (DATASEC[1] - DATASEC[0]) * (seg_width - DATASEC[3]) #avg = np.float64(pixel_sum/pixel_count) return noise
def cumulant(data, n=1, standardized=False): """ Calculates cumulants with moments from scipy.stats package. Scipy.stats doesn't use histogram method. """ if n > 6 or n < 1: raise ValueError("cumulants only supported for 1<=n<=6") n = int(n) mu = {} for k in range(1, n+1): mu[k] = stats.moment(data, moment=k) if k > 2: mu[k] = mu[k] if standardized and n > 2: mu[2] = 1 if n == 1: return np.mean(data) elif n == 2: return mu[2] elif n == 3: return mu[3] elif n == 4: return mu[4] - 3 * mu[2]**2 elif n == 5: return mu[5] - 10 * mu[3] * mu[2] elif n == 6: return mu[6] - 15 * mu[4] * mu[2] - 10 * mu[3]**2 + 30 * mu[2]**3 else: raise ValueError("Should not be here.")
def test_moment(): x = np.random.random(1000) moments = ['zeroth', 'mean', 'var', 'skew', 'kurt'] for i, m in enumerate(moments): calc = moment(x, order=i) lib = stats.moment(x, moment=i) assert calc == approx(lib), m + ' failed'
def handle_msg(self, msg): val = msg.data self.vals.append(val) l = list(self.vals) moments = [] for i in range(1, 11): moments.append(stats.moment(l, moment=i)) msg = Float64MultiArray( MultiArrayLayout([MultiArrayDimension('moments', 10, 1)], 1), moments) self.pub.publish(msg)
def moment_dice_metric(x, y, **kwargs): """ kwargs: fourthmoment - True/False. False by detault """ nx = float(len(x)) ny = float(len(y)) Exi = x.mean() sigmaxi = math.pow(moment(x, 2), 1 / 2.0) # central_moment(x, Exi, 2) sxi = math.pow(abs(moment(x, 3)), 1 / 3.0) # central_moment(x, Exi, 3) kxi = math.pow(moment(x, 4), 1 / 4.0) # central_moment(x, Exi, 4) Eyi = y.mean() sigmayi = math.pow(moment(y, 2), 1 / 2.0) # central_moment(y, Eyi, 2) syi = math.pow(abs(moment(y, 3)), 1 / 3.0) # central_moment(y, Eyi, 3) kyi = math.pow(moment(y, 4), 1 / 4.0) # central_moment(y, Eyi, 4) fourthmoment = False if "fourthmoment" in kwargs: fourthmoment = kwargs["fourthmoment"] if fourthmoment == True: fx1 = np.array([Exi, sigmaxi, sxi, kxi]) fy1 = np.array([Eyi, sigmayi, syi, kyi]) else: fx1 = np.array([Exi, sigmaxi, sxi]) fy1 = np.array([Eyi, sigmayi, syi]) return dice(fx1, fy1)
def extract(self, data): # data array is no longer zero meaned by default! kurtosis = st.kurtosis(data, axis=1) skew = st.skew(data, axis=1) # remeber skew is slightly f****d by 0 mean? # maybe tailed is better? std = np.std(data, axis=1) coastline = self._coastline(data) norm_std_data = data/np.std(data, axis = 1)[:,None] norm_coastline = self._coastline(norm_std_data) max_val = np.max(data, axis=1) all_std_data = data/np.std(data, axis = 0) pos_crossings = self._zero_crossings(all_std_data-4) moment6 = st.moment(data,6,axis=1) #3print variation[:,None].shape ''' features = np.hstack((#kurtosis[:,None], skew[:,None], std[:,None], coastline[:,None], norm_coastline[:,None], #max_val[:,None], pos_crossings[:,None], )) ''' features = np.hstack(( std[:,None], norm_coastline[:,None], coastline[:,None], pos_crossings[:,None], moment6[:,None], skew[:,None], )) print features.shape Inan = np.where(np.isnan(features)) Iinf = np.where(np.isinf(features)) features[Inan] = 0 features[Iinf] = 0 features = self._normalize(features) return features
def get_moments(pred_array, num_Obs): first = np.mean(pred_array) second = moment(pred_array, moment = 2) third = moment(pred_array, moment = 3) return [first, second, third]
def nongaussian_parameter(self): moment2poten2=(moment(self.trajectory,moment=2,axis=0))**2 moment4=moment(self.trajectory,moment=4,axis=0) nongaussianparamter=(1/3.)*moment4/moment2poten2-1 return nongaussianparamter
def measure_complexity(cls, graph): # Calculate the second order moment of the degree. degrees = graph.degree(range(0, graph.vcount())) return stats.moment(degrees, 2)
plt.show() # de Vaucouleurs ratio_d_A = map(truediv, eD2_A1, eD1_A1) angle_d_A = [0.5*orientation(x) for x in ratio_d_A] ratio_d_B = map(truediv, eD2_B1, eD1_B1) angle_d_B = [0.5*orientation(x) for x in ratio_d_B] plt.hist(angle_d_A, bins=60, color='r') plt.hist(angle_d_B, bins=60, color='b', alpha=0.7) plt.xlabel("Angle (degrees)") plt.title("Angle of Orientation of Ellipticity Histogram (de Vaucouleurs)") plt.show() # Mean and Moment (Skewness) a = numpy.mean(angle_e_A) # Mean b = numpy.mean(angle_e_B) c = numpy.mean(angle_d_A) d = numpy.mean(angle_d_B) print('Means (EXP A/B, DEV A/B):', a, b, c, d) x = moment(angle_e_A, moment=3) # 3rd moment (skewness) y = moment(angle_e_B, moment=3) z = moment(angle_d_A, moment=3) w = moment(angle_d_B, moment=3) print('Moment (EXP A/B, DEV A/B):', x, y, z, w)
def apply(self, dataset): axis = dataset.data.ndim - 1 output = [] for i in range(0, self.num_moments+1): output.append(moment(dataset.data, i, axis=axis)) return numpy.array(output)
Fittype="Sequential", passWSIndexToFunction=1, CreateOutput=1, OutputCompositeMembers=1, ConvolveMembers=1) ##################### Fitting has ended here. Some analysis below ##################### ########################################################################################## ## Find the Area, FWHM, skew, and kurtosis for each spectrum of the signal, then plot ## ########################################################################################## dE = signal.dataX(0)[1]-signal.dataX(0)[0] #energy bin s=list() for i in range(signal.getNumberHistograms()): y = signal.dataY(i) # intensities along the energy for spectrum with workspace index "i" s.append([dE*y.sum(), 2.355*stc.moment(y,2), stc.skew(y), stc.kurtosis(y)]) s=numpy.array(s).transpose() qvalues=signal.getAxis(1).extractValues() #plot mean, FWHM, skew, and kurtosis, in this order for i in range(4): plot(qvalues,s[i]) """Viewing the results: The sequential fitting produces the following workspaces: fitSeq_Workspaces: a set of data workspaces containing the curves (data,model,residuals,components) for each spectrum fitSeq_Parameters: a set of tables containin the optimized fitting parameters for each spectrum fitSeq: a composite table of all tables of the previous spectrum """ ################################ ## Plotting one of the fits ## ################################
def run(params): bin_width, filter_bandwidth, theta, shift, \ signal_field, noise_field, noise_multiplier = params # Get file path signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) noise_dir = '/scratch/pkittiwi/fg1p/noise_map/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) output_dir = '/scratch/pkittiwi/fg1p/stats_mc/obsn{:.1f}/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}/s{:03d}' \ .format(noise_multiplier, bin_width, filter_bandwidth, theta, shift, signal_field) signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc' \ .format(signal_dir, bin_width, filter_bandwidth, theta, shift, signal_field) noise_file = '{:s}/noise_map_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc' \ .format(noise_dir, bin_width, filter_bandwidth, theta, shift, noise_field) output_file = '{:s}/stats_mc_obsn{:.1f}_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}_{:03d}.nc' \ .format(output_dir, noise_multiplier, bin_width, filter_bandwidth, theta, shift, signal_field, noise_field) # Load data signal = xr.open_dataarray(signal_file) noise = xr.open_dataarray(noise_file) mask = xr.open_dataarray('/scratch/pkittiwi/fg1p/hera331_fov_mask.nc') for key, values in noise.coords.items(): signal.coords[key] = values mask.coords[key] = values signal, noise, mask = xr.align(signal, noise, mask) # Make observation signal = signal.where(mask == 1).stack(s=('x', 'y')) noise = noise.where(mask == 1).stack(s=('x', 'y')) * noise_multiplier obs = signal + noise # Get noise variance noise_var = moment(noise.values, moment=2, axis=-1, nan_policy='omit') # Get biased moments m2_biased = moment(obs.values, moment=2, axis=-1, nan_policy='omit') m3_biased = moment(obs.values, moment=3, axis=-1, nan_policy='omit') m4_biased = moment(obs.values, moment=4, axis=-1, nan_policy='omit') # Get unbiased moments m2_unbiased = m2_biased - noise_var m3_unbiased = m3_biased m4_unbiased = m4_biased - (6 * m2_unbiased * noise_var) - \ (3 * noise_var ** 2) # Get biased vsk v_biased = m2_biased s_biased = m3_biased / m2_biased ** (3 / 2) k_biased = (m4_biased / m2_biased ** 2) - 3 # Get unbiased vsk v_unbiased = m2_unbiased s_unbiased = m3_unbiased / m2_unbiased ** (3 / 2) k_unbiased = (m4_unbiased / m2_unbiased ** 2) - 3 # Save output out = xr.Dataset( {'m2_biased': (['f'], m2_biased), 'm3_biased': (['f'], m3_biased), 'm4_biased': (['f'], m4_biased), 'm2_unbiased': (['f'], m2_unbiased), 'm3_unbiased': (['f'], m3_unbiased), 'm4_unbiased': (['f'], m4_unbiased), 'v_biased': (['f'], v_biased), 's_biased': (['f'], s_biased), 'k_biased': (['f'], k_biased), 'v_unbiased': (['f'], v_unbiased), 's_unbiased': (['f'], s_unbiased), 'k_unbiased': (['f'], k_unbiased)}, coords={'f': noise.coords['f']}, attrs={ 'signal_field': signal_field, 'noise_field': noise_field, 'noise_multiplier': noise_multiplier, 'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth, 'theta': theta, 'shift': shift } ) os.makedirs(output_dir, exist_ok=True) out.to_netcdf(output_file) print( 'Finish. signal_file = {:s}. noise_file = {:s}. output_file = {:s}.' .format(signal_file, noise_file, output_file) )
def test_moment4(self): self.assertTrue(np.allclose(sml.matrix(m1).moment(moment=4, axis=None), moment(m1, moment=4, axis=None)))
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile", required=True, help="Tabular file.") parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;") parser.add_argument("--test_id", help="statistical test method") parser.add_argument( "--mwu_use_continuity", action="store_true", default=False, help="Whether a continuity correction (1/2.) should be taken into account.", ) parser.add_argument( "--equal_var", action="store_true", default=False, help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.", ) parser.add_argument( "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values." ) parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used") parser.add_argument( "--bias", action="store_true", default=False, help="if false,then the calculations are corrected for statistical bias", ) parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored") parser.add_argument( "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored" ) parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored") parser.add_argument( "--printextras", action="store_true", default=False, help="If True, if there are extra points a warning is raised saying how many of those points there are", ) parser.add_argument( "--initial_lexsort", action="store_true", default="False", help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.", ) parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ") parser.add_argument( "--axis", type=int, default=0, help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)", ) parser.add_argument( "--n", type=int, default=0, help="the number of trials. This is ignored if x gives both the number of successes and failures", ) parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram") parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction") parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--m", type=float, default=0.0, help="limits") parser.add_argument("--mf", type=float, default=2.0, help="lower limit") parser.add_argument("--nf", type=float, default=99.9, help="higher_limit") parser.add_argument( "--p", type=float, default=0.5, help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5", ) parser.add_argument("--alpha", type=float, default=0.9, help="probability") parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds") parser.add_argument( "--proportiontocut", type=float, default=0.0, help="Proportion (in range 0-1) of total data set to trim of each end.", ) parser.add_argument( "--lambda_", type=float, default=1.0, help="lambda_ gives the power in the Cressie-Read power divergence statistic", ) parser.add_argument( "--imbda", type=float, default=0, help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.", ) parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e") parser.add_argument("--dtype", help="dtype") parser.add_argument("--med", help="med") parser.add_argument("--cdf", help="cdf") parser.add_argument("--zero_method", help="zero_method options") parser.add_argument("--dist", help="dist options") parser.add_argument("--ties", help="ties options") parser.add_argument("--alternative", help="alternative options") parser.add_argument("--mode", help="mode options") parser.add_argument("--method", help="method options") parser.add_argument("--md", help="md options") parser.add_argument("--center", help="center options") parser.add_argument("--kind", help="kind options") parser.add_argument("--tail", help="tail options") parser.add_argument("--interpolation", help="interpolation options") parser.add_argument("--statistic", help="statistic options") args = parser.parse_args() infile = args.infile outfile = open(args.outfile, "w+") test_id = args.test_id nf = args.nf mf = args.mf imbda = args.imbda inclusive1 = args.inclusive1 inclusive2 = args.inclusive2 sample0 = 0 sample1 = 0 sample2 = 0 if args.sample_cols != None: sample0 = 1 barlett_samples = [] for sample in args.sample_cols.split(";"): barlett_samples.append(map(int, sample.split(","))) if args.sample_one_cols != None: sample1 = 1 sample_one_cols = args.sample_one_cols.split(",") if args.sample_two_cols != None: sample_two_cols = args.sample_two_cols.split(",") sample2 = 1 for line in open(infile): sample_one = [] sample_two = [] cols = line.strip().split("\t") if sample0 == 1: b_samples = columns_to_values(barlett_samples, line) if sample1 == 1: for index in sample_one_cols: sample_one.append(cols[int(index) - 1]) if sample2 == 1: for index in sample_two_cols: sample_two.append(cols[int(index) - 1]) if test_id.strip() == "describe": size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one)) cols.append(size) cols.append(min_max) cols.append(mean) cols.append(uv) cols.append(bs) cols.append(bk) elif test_id.strip() == "mode": vals, counts = stats.mode(map(float, sample_one)) cols.append(vals) cols.append(counts) elif test_id.strip() == "nanmean": m = stats.nanmean(map(float, sample_one)) cols.append(m) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "kurtosistest": z_value, p_value = stats.kurtosistest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "itemfreq": freq = stats.itemfreq(map(float, sample_one)) for list in freq: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "boxcox_llf": IIf = stats.boxcox_llf(imbda, map(float, sample_one)) cols.append(IIf) elif test_id.strip() == "tiecorrect": fa = stats.tiecorrect(map(float, sample_one)) cols.append(fa) elif test_id.strip() == "rankdata": r = stats.rankdata(map(float, sample_one), method=args.md) cols.append(r) elif test_id.strip() == "nanstd": s = stats.nanstd(map(float, sample_one), bias=args.bias) cols.append(s) elif test_id.strip() == "anderson": A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist) cols.append(A2) for list in critical: cols.append(list) cols.append(",") for list in sig: cols.append(list) elif test_id.strip() == "binom_test": p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p) cols.append(p_value) elif test_id.strip() == "gmean": gm = stats.gmean(map(float, sample_one), dtype=args.dtype) cols.append(gm) elif test_id.strip() == "hmean": hm = stats.hmean(map(float, sample_one), dtype=args.dtype) cols.append(hm) elif test_id.strip() == "kurtosis": k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias) cols.append(k) elif test_id.strip() == "moment": n_moment = stats.moment(map(float, sample_one), n=args.n) cols.append(n_moment) elif test_id.strip() == "normaltest": k2, p_value = stats.normaltest(map(float, sample_one)) cols.append(k2) cols.append(p_value) elif test_id.strip() == "skew": skewness = stats.skew(map(float, sample_one), bias=args.bias) cols.append(skewness) elif test_id.strip() == "skewtest": z_value, p_value = stats.skewtest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "sem": s = stats.sem(map(float, sample_one), ddof=args.ddof) cols.append(s) elif test_id.strip() == "zscore": z = stats.zscore(map(float, sample_one), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "signaltonoise": s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof) cols.append(s2n) elif test_id.strip() == "percentileofscore": p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind) cols.append(p) elif test_id.strip() == "bayes_mvs": c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha) cols.append(c_mean) cols.append(c_var) cols.append(c_std) elif test_id.strip() == "sigmaclip": c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n) cols.append(c) cols.append(c_low) cols.append(c_up) elif test_id.strip() == "kstest": d, p_value = stats.kstest( map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode ) cols.append(d) cols.append(p_value) elif test_id.strip() == "chi2_contingency": chi2, p, dof, ex = stats.chi2_contingency( map(float, sample_one), correction=args.correction, lambda_=args.lambda_ ) cols.append(chi2) cols.append(p) cols.append(dof) cols.append(ex) elif test_id.strip() == "tmean": if nf is 0 and mf is 0: mean = stats.tmean(map(float, sample_one)) else: mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(mean) elif test_id.strip() == "tmin": if mf is 0: min = stats.tmin(map(float, sample_one)) else: min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive) cols.append(min) elif test_id.strip() == "tmax": if nf is 0: max = stats.tmax(map(float, sample_one)) else: max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive) cols.append(max) elif test_id.strip() == "tvar": if nf is 0 and mf is 0: var = stats.tvar(map(float, sample_one)) else: var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(var) elif test_id.strip() == "tstd": if nf is 0 and mf is 0: std = stats.tstd(map(float, sample_one)) else: std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(std) elif test_id.strip() == "tsem": if nf is 0 and mf is 0: s = stats.tsem(map(float, sample_one)) else: s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(s) elif test_id.strip() == "scoreatpercentile": if nf is 0 and mf is 0: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation ) else: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation ) for list in s: cols.append(list) elif test_id.strip() == "relfreq": if nf is 0 and mf is 0: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b) else: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf)) for list in rel: cols.append(list) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "binned_statistic": if nf is 0 and mf is 0: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b ) else: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, range=(mf, nf), ) cols.append(st) cols.append(b_edge) cols.append(b_n) elif test_id.strip() == "threshold": if nf is 0 and mf is 0: o = stats.threshold(map(float, sample_one), newval=args.new) else: o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new) for list in o: cols.append(list) elif test_id.strip() == "trimboth": o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut) for list in o: cols.append(list) elif test_id.strip() == "trim1": t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail) for list in t1: cols.append(list) elif test_id.strip() == "histogram": if nf is 0 and mf is 0: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b) else: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf)) cols.append(hi) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "cumfreq": if nf is 0 and mf is 0: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b) else: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf)) cols.append(cum) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "boxcox_normmax": if nf is 0 and mf is 0: ma = stats.boxcox_normmax(map(float, sample_one)) else: ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method) cols.append(ma) elif test_id.strip() == "boxcox": if imbda is 0: box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha) cols.append(box) cols.append(ma) cols.append(ci) else: box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha) cols.append(box) elif test_id.strip() == "histogram2": h2 = stats.histogram2(map(float, sample_one), map(float, sample_two)) for list in h2: cols.append(list) elif test_id.strip() == "ranksums": z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two)) cols.append(z_statistic) cols.append(p_value) elif test_id.strip() == "ttest_1samp": t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two)) for list in t: cols.append(list) for list in prob: cols.append(list) elif test_id.strip() == "ansari": AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two)) cols.append(AB) cols.append(p_value) elif test_id.strip() == "linregress": slope, intercept, r_value, p_value, stderr = stats.linregress( map(float, sample_one), map(float, sample_two) ) cols.append(slope) cols.append(intercept) cols.append(r_value) cols.append(p_value) cols.append(stderr) elif test_id.strip() == "pearsonr": cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two)) cols.append(cor) cols.append(p_value) elif test_id.strip() == "pointbiserialr": r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two)) cols.append(r) cols.append(p_value) elif test_id.strip() == "ks_2samp": d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two)) cols.append(d) cols.append(p_value) elif test_id.strip() == "mannwhitneyu": mw_stats_u, p_value = stats.mannwhitneyu( map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "zmap": z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "ttest_ind": mw_stats_u, p_value = stats.ttest_ind( map(float, sample_one), map(float, sample_two), equal_var=args.equal_var ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "ttest_rel": t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(t) cols.append(prob) elif test_id.strip() == "mood": z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(z) cols.append(p_value) elif test_id.strip() == "shapiro": W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta) cols.append(W) cols.append(p_value) for list in a: cols.append(list) elif test_id.strip() == "kendalltau": k, p_value = stats.kendalltau( map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort ) cols.append(k) cols.append(p_value) elif test_id.strip() == "entropy": s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base) cols.append(s) elif test_id.strip() == "spearmanr": if sample2 == 1: rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two)) else: rho, p_value = stats.spearmanr(map(float, sample_one)) cols.append(rho) cols.append(p_value) elif test_id.strip() == "wilcoxon": if sample2 == 1: T, p_value = stats.wilcoxon( map(float, sample_one), map(float, sample_two), zero_method=args.zero_method, correction=args.correction, ) else: T, p_value = stats.wilcoxon( map(float, sample_one), zero_method=args.zero_method, correction=args.correction ) cols.append(T) cols.append(p_value) elif test_id.strip() == "chisquare": if sample2 == 1: rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof) else: rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof) cols.append(rho) cols.append(p_value) elif test_id.strip() == "power_divergence": if sample2 == 1: stat, p_value = stats.power_divergence( map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_ ) else: stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_) cols.append(stat) cols.append(p_value) elif test_id.strip() == "theilslopes": if sample2 == 1: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha) else: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha) cols.append(mpe) cols.append(met) cols.append(lo) cols.append(up) elif test_id.strip() == "combine_pvalues": if sample2 == 1: stat, p_value = stats.combine_pvalues( map(float, sample_one), method=args.med, weights=map(float, sample_two) ) else: stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med) cols.append(stat) cols.append(p_value) elif test_id.strip() == "obrientransform": ob = stats.obrientransform(*b_samples) for list in ob: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "f_oneway": f_value, p_value = stats.f_oneway(*b_samples) cols.append(f_value) cols.append(p_value) elif test_id.strip() == "kruskal": h, p_value = stats.kruskal(*b_samples) cols.append(h) cols.append(p_value) elif test_id.strip() == "friedmanchisquare": fr, p_value = stats.friedmanchisquare(*b_samples) cols.append(fr) cols.append(p_value) elif test_id.strip() == "fligner": xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(xsq) cols.append(p_value) elif test_id.strip() == "bartlett": T, p_value = stats.bartlett(*b_samples) cols.append(T) cols.append(p_value) elif test_id.strip() == "levene": w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(w) cols.append(p_value) elif test_id.strip() == "median_test": stat, p_value, m, table = stats.median_test( ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples ) cols.append(stat) cols.append(p_value) cols.append(m) cols.append(table) for list in table: elements = ",".join(map(str, list)) cols.append(elements) outfile.write("%s\n" % "\t".join(map(str, cols))) outfile.close()