def descStats(data): """ Compute descriptive statistics of data """ dataList = list(data) logDataList = list(N.log10(dataList)) desc = dict() if len(dataList) == 0: desc['mean'] = 0 desc['median'] = 0 desc['logMean'] = 0 desc['logMedian'] = 0 elif len(dataList) < 2: desc['mean'] = dataList[0] desc['median'] = dataList[0] desc['logMean'] = logDataList[0] desc['logMedian'] = logDataList[0] else: desc['mean'] = mean(dataList) desc['median'] = median(dataList) desc['logMean'] = mean(logDataList) desc['logMedian'] = median(logDataList) if len(dataList) < 3: desc['stdev'] = 0 desc['sterr'] = 0 desc['logStdev'] = 0 desc['logSterr'] = 0 else: desc['stdev'] = std(dataList) desc['sterr'] = stderr(dataList) desc['logStdev'] = std(logDataList) desc['logSterr'] = stderr(logDataList) return desc
def estimate_rz(psr, T, show=0, device='/XWIN'): """ estimate_rz(psr, T, eo=0.0, show=0, device='/XWIN'): Return estimates of a pulsar's average Fourier freq ('r') relative to its nominal Fourier freq as well as its Fourier f-dot ('z') in bins, of a pulsar. 'psr' is a psrparams structure describing the pulsar. 'T' is the length of the observation in sec. 'show' if true, displays plots of 'r' and 'z'. 'device' if the device to plot to if 'show' is true. """ from scipy.stats import mean startE = keplars_eqn(psr.orb.t, psr.orb.p, psr.orb.e, 1.0E-15) numorbpts = int(T / psr.orb.p + 1.0) * 1024 + 1 dt = T / (numorbpts - 1) E = dorbint(startE, numorbpts, dt, psr.orb) z = z_from_e(E, psr, T) r = T / p_from_e(E, psr) - T / psr.p if show: times = Num.arange(numorbpts) * dt Pgplot.plotxy(r, times, labx = 'Time', \ laby = 'Fourier Frequency (r)', device=device) if device == '/XWIN': print 'Press enter to continue:' i = raw_input() Pgplot.nextplotpage() Pgplot.plotxy(z, times, labx='Time', laby='Fourier Frequency Derivative (z)', device=device) Pgplot.closeplot() return (mean(r), mean(z))
def estimate_rz(psr, T, show=0, device='/XWIN'): """ estimate_rz(psr, T, eo=0.0, show=0, device='/XWIN'): Return estimates of a pulsar's average Fourier freq ('r') relative to its nominal Fourier freq as well as its Fourier f-dot ('z') in bins, of a pulsar. 'psr' is a psrparams structure describing the pulsar. 'T' is the length of the observation in sec. 'show' if true, displays plots of 'r' and 'z'. 'device' if the device to plot to if 'show' is true. """ from scipy.stats import mean startE = keplars_eqn(psr.orb.t, psr.orb.p, psr.orb.e, 1.0E-15) numorbpts = int(T / psr.orb.p + 1.0) * 1024 + 1 dt = T / (numorbpts - 1) E = dorbint(startE, numorbpts, dt, psr.orb) z = z_from_e(E, psr, T) r = T/p_from_e(E, psr) - T/psr.p if show: times = Num.arange(numorbpts) * dt Pgplot.plotxy(r, times, labx = 'Time', \ laby = 'Fourier Frequency (r)', device=device) if device=='/XWIN': print 'Press enter to continue:' i = raw_input() Pgplot.nextplotpage() Pgplot.plotxy(z, times, labx = 'Time', laby = 'Fourier Frequency Derivative (z)', device=device) Pgplot.closeplot() return (mean(r), mean(z))
def mapsswe(x, y): xm = mean(x) ym = mean(y) s = 0. n = 0. for xi, yi in izip(w1, w2): s += ((xi - yi) - (xm - ym))**2 n += 1 t_stat = sqrt(n) * abs(xm - ym) / sqrt(s / (n - 1.)) p_value = t.sf(t_stat, n - 1) * 2 return t_stat, p_value
def mapsswe(x, y): xm = mean(x) ym = mean(y) s = 0. n = 0. for xi, yi in izip(w1, w2): s += ((xi-yi) - (xm-ym))**2 n += 1 t_stat = sqrt(n) * abs(xm-ym) / sqrt(s/(n-1.)) p_value = t.sf(t_stat, n-1) * 2 return t_stat, p_value
def test_basic(self): a = [3, 4, 5, 10, -3, -5, 6] af = [3., 4, 5, 10, -3, -5, -6] Na = len(a) Naf = len(af) mn1 = 0.0 for el in a: mn1 += el / float(Na) assert_almost_equal(stats.mean(a), mn1, 11) mn2 = 0.0 for el in af: mn2 += el / float(Naf) assert_almost_equal(stats.mean(af), mn2, 11)
def test_basic(self): a = [3,4,5,10,-3,-5,6] af = [3.,4,5,10,-3,-5,-6] Na = len(a) Naf = len(af) mn1 = 0.0 for el in a: mn1 += el / float(Na) assert_almost_equal(stats.mean(a),mn1,11) mn2 = 0.0 for el in af: mn2 += el / float(Naf) assert_almost_equal(stats.mean(af),mn2,11)
def test_2d(self): a = [[1.0, 2.0, 3.0], [2.0, 4.0, 6.0], [8.0, 12.0, 7.0]] A = array(a) N1, N2 = (3, 3) mn1 = zeros(N2, dtype=float) for k in range(N1): mn1 += A[k, :] / N1 assert_almost_equal(stats.mean(a, axis=0), mn1, decimal=13) assert_almost_equal(stats.mean(a), mn1, decimal=13) mn2 = zeros(N1, dtype=float) for k in range(N2): mn2 += A[:, k] mn2 /= N2 assert_almost_equal(stats.mean(a, axis=1), mn2, decimal=13)
def test_2d(self): a = [[1.0, 2.0, 3.0], [2.0, 4.0, 6.0], [8.0, 12.0, 7.0]] A = array(a) N1, N2 = (3, 3) mn1 = zeros(N2, dtype=float) for k in range(N1): mn1 += A[k,:] / N1 assert_almost_equal(stats.mean(a, axis=0), mn1, decimal=13) assert_almost_equal(stats.mean(a), mn1, decimal=13) mn2 = zeros(N1, dtype=float) for k in range(N2): mn2 += A[:,k] mn2 /= N2 assert_almost_equal(stats.mean(a, axis=1), mn2, decimal=13)
def test_z(self): """ not in R, so used (10-mean(testcase,axis=0))/sqrt(var(testcase)*3/4) """ y = stats.z(self.testcase, stats.mean(self.testcase)) assert_almost_equal(y, 0.0)
def determine_whiten(self): # > pilot run function that determines the scaled of each statistics print('[ABC] pilot run: whitening') # Record current settings whiten = self.hyperparams.whiten num_sim = self.num_sim # Do simulation self.hyperparams.whiten = False self.num_sim = self.hyperparams.pilot_run_N self.simulate() # Compute mean & covariance matrix stats = self.stats stats = np.mat(stats) mean = stats.mean(axis=0) cov = (stats - mean).T * (stats - mean) / stats.shape[0] self.COV = cov self.mean = mean # Save results utils_os.save_object(self.save_dir, 'pilot_run_whiten_cov.npy', self.COV) utils_os.save_object(self.save_dir, 'pilot_run_whiten_mean.npy', self.mean) # Recover settings self.hyperparams.whiten = whiten self.num_sim = num_sim
def test_z(self): """ not in R, so used (10-mean(testcase,axis=0))/sqrt(var(testcase)*3/4) """ y = stats.z(self.testcase,stats.mean(self.testcase)) assert_almost_equal(y,0.0)
def nanmean(x): """Find the mean of x ignoring nans. fixme: should be fixed to work along an axis. """ x = _asarray1d(x).copy() y = compress(isfinite(x), x) return mean(y)
def _remove_outliers(data): return data if len(data) < 2: return data else: lmean = mean(data) limstdev = 3 * std(data) return [item for item in data if abs(item - lmean) < limstdev ]
def input_gps_height(data,df): x = len(data) for i in range(x): if df.ix[i,'gps_height'] == 0: neighbors = euclid_knn.kneighbors(data[i],return_distance = False) neighbors = neighbors.flatten().tolist() temp = stats.mean(df.ix[neighbors,'gps_height'])[0][0] df.ix[i,'gps_height']= int(temp)
def nanmean(x, axis=-1): """Compute the mean over the given axis ignoring nans. """ x, axis = _chk_asarray(x, axis) x = x.copy() Norig = x.shape[axis] factor = 1.0 - sum(isnan(x), axis) * 1.0 / Norig putmask(x, isnan(x), 0) return mean(x, axis) / factor
def nanmean(x,axis=-1): """Compute the mean over the given axis ignoring nans. """ x, axis = _chk_asarray(x,axis) x = x.copy() Norig = x.shape[axis] factor = 1.0-sum(isnan(x),axis)*1.0/Norig putmask(x,isnan(x),0) return mean(x,axis)/factor
def downsample(signal, factor): try: from scipy.stats import nanmean as mean except ImportError: import np.mean as mean signal = np.array(signal) xs = signal.shape[0] signal = signal[:xs-(xs % int(factor))] result = mean(np.concatenate([[signal[i::factor] for i in range(factor)]]), axis=0) return result
def regression(self): """ Perform linear regression """ stats = BasicStats() n = len(self.data['x']) x2_sum = stats.sum_squares(self.data['x']) x_sum2 = sum(self.data['x'])*sum(self.data['x']) x_sum = sum(self.data['x']) y_sum = sum(self.data['y']) xy_sum = stats.sum_xy(self.data) self.alpha = (y_sum*x2_sum - x_sum*xy_sum)/(n*x2_sum-x_sum2) self.beta = (n*xy_sum - x_sum*y_sum)/(n*x2_sum-x_sum2) self.beta = stats.cov(self.data['x'],self.data['y'])/stats.var(self.data['x']) self.alpha = stats.mean(self.data['y']) - self.beta * stats.mean(self.data['x'])
def reindex(self): periods = self.periods period = self.series[-periods:] sma = None if len(period) == periods: try: sma = mean(period) except (TypeError, IndexError): pass self.append(sma)
def reindex(self): periods = self.periods period = self.series[-periods:] vol = None if len(period) == periods: try: vol = std(period) / mean(period) vol *= 100 except TypeError: pass self.append(vol)
def get_statistics_from_diffs(diffs): the_mean = st.mean(diffs) return { 'min': min(diffs), 'max': max(diffs), 'mean': the_mean, 'median': st.median(diffs), 'stdev': st.stdev(diffs, the_mean), 'q1': np.percentile(diffs, 25), 'q3': np.percentile(diffs, 75) }
def table_bootpack(table, bin_size, n_bootstraps, seed=8472): new_table = data.Table() for head, subtable in misc.sorted_groupby(table, key=lambda r: r.corr.shape): subtable = data.Table(subtable) stacked_corrs = numpy.stack(subtable['corr'], axis=0) bootpacks = data.BootPack( stats.mean(stacked_corrs), stats.bootstrap(stats.bin_(stacked_corrs, bin_size), n_bootstraps, seed=seed)) new_table.extend( data.Record(record, corr=bootpack, bin_size=bin_size, n_bootstraps=n_bootstraps) for record, bootpack in zip(subtable, bootpacks)) return new_table
def _calc_basic_statistics(self): """This function determines the mean and the standard deviation of the data sample. Furthermore, several other simple properties are determined. """ self.mean = stats.mean(self._data_samples) self.geom_mean = stats.geomean(self._data_samples) self.median = stats.median(self._data_samples) self.std_dev = stats.stddev(self._data_samples) self.min = min(self._data_samples) self.max = max(self._data_samples)
def __init__(self, samples): self.samples = numpy.asarray(samples) self.N = len(samples) self.median = stats.median(samples) self.min = numpy.amin(samples) self.max = numpy.amax(samples) self.mean = stats.mean(samples) self.std = stats.std(samples) self.var = self.std**2. self.skew = stats.skew(samples) self.kurtosis = stats.kurtosis(samples) self.range = self.max - self.min
def calcola_af(AF): try: AF.remove('.') except: print(AF) return '.' try: return stats.mean(AF) except: print(AF) return '.'
def downsample_cube(myarr,factor,ignoredim=0): """ Downsample a 3D array by averaging over *factor* pixels on the last two axes. """ if ignoredim > 0: myarr = myarr.swapaxes(0,ignoredim) zs,ys,xs = myarr.shape crarr = myarr[:,:ys-(ys % int(factor)),:xs-(xs % int(factor))] dsarr = mean(numpy.concatenate([[crarr[:,i::factor,j::factor] for i in range(factor)] for j in range(factor)]), axis=0) if ignoredim > 0: dsarr = dsarr.swapaxes(0,ignoredim) return dsarr
def test_mean_simple(self): self.assertEqual(2, stats.mean([1, 2, 3])) self.assertAlmostEqual(2, stats.mean([1.0, 2.0, 3.0])) self.assertAlmostEqual(25, stats.mean(self._integers)) self.assertAlmostEqual(25, stats.mean(self._floats)) self.assertAlmostEqual(25 + 2.31, stats.mean(self._floats2)) self.assertAlmostEqual(27.295918367, stats.mean(self._mixed))
def _getcs(self, data): if self.c: c = data[self.c] #print data, c sigma, mu = std(c), mean(c) c = N.clip(c, mu - 2 * sigma, mu + 2 * sigma) c = (max(c) - c) / (max(c) - min(c)) else: c = 'b' if self.s: s = data[self.s] else: s = 10.0 return c, s
def _getcs(self, data): if self.c: c = data[self.c] #print data, c sigma, mu = std(c), mean(c) c = N.clip(c, mu - 2*sigma, mu + 2*sigma) c = (max(c) - c)/(max(c) - min(c)) else: c = 'b' if self.s: s = data[self.s] else: s = 10.0 return c, s
def interp(self, x): """ Average multiple values at edges of numpy.array to use for extrapoltion. This method only works for extrapolation. """ if numpy.alltrue(numpy.logical_and(x < self._x[0], x > self._x[-1])): msg = "end_average() only works for extrapolation. Some of the "\ "in x fall between the endpoints (x[0], x[-1]) of the "\ "x numpy.array." raise ValueError, msg # find the average y value within depth_interval at both the start and # end of the data set that is within depth_interval distance from the # ends. indices = (self._x[0]+self.index_interval, self._x[-1]-self.index_interval) first, last = numpy.searchsorted(self._x, indices) y_low = stats.mean(self._y[:first]) y_hi = stats.mean(self._y[last:]) dist_low = abs(x - self._x[0]) dist_hi = abs(x - self._x[-1]) y = numpy.choose(dist_low > dist_hi, (y_low, y_hi)) return y
def confidence(samples, confidence_level): """This function determines the confidence interval for a given set of samples, as well as the mean, the standard deviation, and the size of the confidence interval as a percentage of the mean. From javastats by Andy Georges. """ mean = stats.mean(samples) sdev = stats.std(samples) n = len(samples) df = n - 1 t = distributions.t.ppf((1+confidence_level)/2.0, df) interval = (interval_low, interval_high) = ( mean - t * sdev / math.sqrt(n) , mean + t * sdev / math.sqrt(n) ) interval_size = interval_high - interval_low interval_percentage = interval_size / mean * 100.0 return (interval, mean, sdev, interval_percentage)
def test_mean_vs_numpy(self): self.assertEqual(numpy.mean([1, 2, 3]), stats.mean([1, 2, 3])) self.assertAlmostEqual(numpy.mean([1.0, 2.0, 3.0]), stats.mean([1.0, 2.0, 3.0])) self.assertAlmostEqual(numpy.mean(self._integers), stats.mean(self._integers)) self.assertAlmostEqual(numpy.mean(self._floats), stats.mean(self._floats)) self.assertAlmostEqual(numpy.mean(self._floats2), stats.mean(self._floats2)) self.assertAlmostEqual(numpy.mean(self._mixed), stats.mean(self._mixed))
def skewsField(sample, field): """ Checks whether the value of field in the passed in sample is significantly different from the value of field for the rest of the samples under consideration. """ savedSamples = samples.sampleList[:] samples.sampleList.remove(sample) try: flds = samples.getAllFlds(field) mean = stats.mean(flds) stddev = stats.std(flds) val = sample[field] if stddev == 0: devs = 0 else: devs = abs(val - mean) / stddev finally: #we should be fixing the sample list even when I crash! samples.sampleList = savedSamples if len(samples.sampleList) < 3: qual = confidence.Validity.plaus elif len(samples.sampleList) < 6: qual = confidence.Validity.prob else: qual = confidence.Validity.sound conf = __getConfidence((.5, 1, 2, 3, 5), devs, qual) samples.sampleList.sort(key=lambda x: samples.extractField(x, field)) plot = __getPlot('id', field) plot.plotLine(0, mean) plot.plotLine(0, mean-stddev) plot.plotLine(0, mean+stddev) plot.plotLine(0, sample[field]) return SimResult(conf, str(sample) + " has a different " + field + " from other samples", str(sample) + "'s value for " + field + ' is ' + str(devs) + ' standard deviations from the mean', plot)
def bootstrap(data, num_samples, statistic, alpha): """Returns the results from num_samples bootstrap samples for an input test statistic, its standard deviation, and its 100*(1-alpha)% confidence level interval.""" # Generate the indices for the required number of permutations/(resamplings with replacement) required idx = npr.randint(0, len(data), (num_samples, len(data))) # Generate the multiple resampled data set from the original one samples = data[idx] # Apply the 'statistic' function given to each of the data sets produced by the resampling and order the resulting statistic by decreasing size. stats = np.sort(statistic(samples, 1)) stat = stats.mean() # Return the value of the computed statistic at the upper and lower percentiles specified by the alpha parameter given. These are, by definition, the boundaries of the Confidence Interval for that value of alpha. E.g. alpha=0.05 ==> CI 95% low_ci = stats[int((alpha / 2.0) * num_samples)] high_ci = stats[int((1 - alpha / 2.0) * num_samples)] #sd = np.std(stat) # To include Bessel's correction for unbiased standard deviation: sd = np.sqrt(len(data) / (len(data) - 1)) * np.std(stats) return stat, sd, low_ci, high_ci
def main(): if( len(sys.argv) != 3 ): printUsage() executable = sys.argv[1] no_times = int(sys.argv[2]) cmd = "time ./%s" % executable times = zeros(no_times, 'f'); for i in range(no_times): t = time(); os.system(cmd); t_store = time() - t; print "t_store= ", t_store times[i] = t_store; print "Mean time for operation = ", mean(times) print "Standard deviation for operation = ", std(times)
def reindex(self): try: last = self[-1] except (IndexError, ): self.append(None) return periods = self.periods ema = None if last is None: try: period = self.series[-periods:] if len(period) == periods: ema = mean(period) except (TypeError, ): pass else: pt = self.series[-1] k = self.k / (periods + 1) ema = last + (k * (pt - last)) self.append(ema)
def autocorrelation(series, k=1, biased=True): """Returns autocorrelation of order 'k' and corresponding two-tailed pvalue. (Inspired by CLM pp.45-47) @param series: The series on which to compute autocorrelation @param k: The order to which compute autocorrelation @param biased: If False, rho_k will be corrected according to Fuller (1976) @return: rho_k, pvalue """ T = len(series) mu = mean(series) sigma = var(series) # Centered observations obs = series - mu lagged = lag(obs, k) truncated = obs[:-k] assert len(lagged) == len(truncated) # Multiplied by 'T' for numerical stability gamma_k = T * add.reduce(truncated * lagged) # Numerator gamma_0 = T * add.reduce(obs * obs) # Denominator rho_k = (gamma_k / gamma_0) if rho_k > 1.0: rho_k = 1.0 # Correct for numerical errors # The standard normal random variable Z = sqrt(T) * rho_k # Bias correction? if not biased: rho_k += (1 - rho_k**2) * (T - k) / (T - 1)**2 Z = rho_k * T / sqrt(T - k) # The two-tailed p-value is twice the prob that value of a std normal r.v. # turns out to be greater than the (absolute) value of Z pvalue = 2 * (1 - norm.cdf(abs(Z))) assert pvalue >= 0.0 and pvalue <= 1.0 return rho_k, pvalue
def autocorrelation(series, k=1, biased=True): """Returns autocorrelation of order 'k' and corresponding two-tailed pvalue. (Inspired by CLM pp.45-47) @param series: The series on which to compute autocorrelation @param k: The order to which compute autocorrelation @param biased: If False, rho_k will be corrected according to Fuller (1976) @return: rho_k, pvalue """ T = len(series) mu = mean(series) sigma = var(series) # Centered observations obs = series-mu lagged = lag(obs, k) truncated = obs[:-k] assert len(lagged) == len(truncated) # Multiplied by 'T' for numerical stability gamma_k = T*add.reduce(truncated*lagged) # Numerator gamma_0 = T*add.reduce(obs*obs) # Denominator rho_k = (gamma_k / gamma_0) if rho_k > 1.0: rho_k = 1.0 # Correct for numerical errors # The standard normal random variable Z = sqrt(T)*rho_k # Bias correction? if not biased: rho_k += (1 - rho_k**2) * (T-k)/(T-1)**2 Z = rho_k * T/sqrt(T-k) # The two-tailed p-value is twice the prob that value of a std normal r.v. # turns out to be greater than the (absolute) value of Z pvalue = 2*( 1 - norm.cdf(abs(Z)) ) assert pvalue >= 0.0 and pvalue <= 1.0 return rho_k, pvalue
def fit_gaussians(data, initial_params, errs, profnm): numparams = len(initial_params) numgaussians = (len(initial_params)-1)/3 # Generate the parameter structure parinfo = [] params0 = [] for ii in range(numparams): params0.append(initial_params[ii]) parinfo.append({'value':initial_params[ii], 'fixed':0, 'limited':[0,0], 'limits':[0.,0.]}) other_args = {'data':data, 'errs':errs} # Now fit it mpfit_out = mpfit.mpfit(fit_function, params0, functkw=other_args, parinfo=parinfo, quiet=1) fit_params = mpfit_out.params fit_errs = mpfit_out.perror # degrees of freedom dof = len(data) - len(fit_params) # chi-squared for the model fit chi_sq = mpfit_out.fnorm print "------------------------------------------------------------------" print "Multi-Gaussian Fit by pygaussfit.py of '%s'"%profnm print "------------------------------------------------------------------" print "mpfit status:", mpfit_out.status print "gaussians:", numgaussians print "DOF:", dof print "chi-sq: %.2f" % chi_sq print "reduced chi-sq: %.2f" % (chi_sq/dof) residuals = data - gen_gaussians(fit_params, len(data)) print "residuals mean: %.3g"%mean(residuals) print "residuals stdev: %.3g"%std(residuals) print "--------------------------------------" print " const = %.5f +/- %.5f" % (fit_params[0], fit_errs[0]) for ii in range(numgaussians): print " phas%d = %.5f +/- %.5f" % (ii+1, fit_params[1+ii*3], fit_errs[1+ii*3]) print " fwhm%d = %.5f +/- %.5f" % (ii+1, fit_params[2+ii*3], fit_errs[2+ii*3]) print " ampl%d = %.5f +/- %.5f" % (ii+1, fit_params[3+ii*3], fit_errs[3+ii*3]) print "--------------------------------------" return fit_params, fit_errs, chi_sq, dof
def histo_plotter(x, SPECS): # the histogram of the data n, bins, patches = hist(x, 50, normed=0) setp(patches, 'facecolor', 'g', 'alpha', 0.75) for i in range(len(SPECS)): out_of_spec = 0 for j in range(len(bins)): if (bins[j] <= SPECS[i]): setp(patches[j], 'facecolor', 'r', 'alpha', 0.75) out_of_spec = out_of_spec + n[j] out_summary="#of Samples Below "+str(SPECS[i])+" :"+str(out_of_spec)+\ "\n From "+str(len(x))+" Samples " fp3.write(out_summary) #print patches # add a 'best fit' line mu = stats.mean(x) sigma = stats.std(x) maxfreq = max(n) minval = min(x) out_summary2 = "Minimum Value: " + fix(minval, 3) fp3.write(out_summary2) # print x, bins, n, mu, sigma y = normpdf(bins, mu, sigma) l = plot(bins, y, 'r--') #y = normpdf( bins) #l = plot(bins, n, 'r--') setp(l, 'linewidth', 1) xlabel('Clearance') ylabel('Count') title(r'$\rm{Histogram\ of\ Clearance}$') axis([bins[0], bins[49], 0.0, maxfreq]) text(.01 + bins[0], .9 * maxfreq, out_summary, color='r') text(.01 + bins[0], .8 * maxfreq, out_summary2, color='b') grid(True) #savefig('histogram_demo',dpi=72) show()
def filter_extrange2(anal): meanAll = mean(flattened(anal.rawData)) meanAllL = mean(flattened(anal.rawDataL)) meanAllR = mean(flattened(anal.rawDataR)) deleted = list() for ind in range(anal.shape[0]): meanInd = mean(flattened(anal.rawData [ind])) meanIndL = mean(flattened(anal.rawDataL[ind])) meanIndR = mean(flattened(anal.rawDataR[ind])) if abs(meanAll - meanInd) > meanAll / 6: deleted.append(ind) elif abs(meanAllL - meanIndL) > meanAllL / 6: deleted.append(ind) elif abs(meanAllR - meanIndR) > meanAllR / 6: deleted.append(ind) deleted = N.unique(deleted) print "Deleted individuals: %d out of %d" % (len(deleted), anal.rawData.shape[0]) N.delete(anal.rawData , deleted, axis = 0) N.delete(anal.rawDataL, deleted, axis = 0) N.delete(anal.rawDataR, deleted, axis = 0)
def extract(self): return (stats.mean(self.flux_data))
def test_ravel(self): a = rand(5, 3, 5) A = 0 for val in ravel(a): A += val assert_almost_equal(stats.mean(a, axis=None), A / (5 * 3.0 * 5))
from scipy import stats import sys ephemerides = ReadEphemeridesLog(sys.argv[1]) comps = [] for e in ephemerides: c = ComparePysolarToUSNO(e) comps.append(c) az_errors = [c.az_error for c in comps] alt_errors = [c.alt_error for c in comps] print '---------------------' print 'Azimuth stats' print 'Mean error: ' + str(stats.mean(az_errors)) print 'Std dev: ' + str(stats.std(az_errors)) print 'Min error: ' + str(stats.tmin(az_errors, None)) print 'Max error: ' + str(stats.tmax(az_errors, None)) print '----------------------' print 'Altitude stats' print 'Mean error: ' + str(stats.mean(alt_errors)) print 'Std dev: '+ str(stats.std(alt_errors)) print 'Min error: ' + str(stats.tmin(alt_errors, None)) print 'Max error: ' + str(stats.tmax(alt_errors, None)) WriteComparisonsToCSV(comps, 'pysolar_v_usno.csv')
def test_meanROUND(self): y = stats.mean(ROUND) assert_approx_equal(y, 4.500000000)
def featurescalculator(sigbufs, n): Desface = 12000 x = 0 + Desface # desface en # de muestras donde inicia el examen aumento = 0 segundos = int( (len(sigbufs[3, :]) - Desface) / 200) # numero de segundos del examen Features = np.empty(((n - 5) * segundos, 32)) # matriz de caracteristicas for a in np.arange(segundos): for i in np.arange((n - 5)): #n-2 numero de canales warnings.filterwarnings("ignore") #TIEMPO minimo = scipy.stats.tmin(sigbufs[i, x:x + 200]) maximo = scipy.stats.tmax(sigbufs[i, x:x + 200]) kurto = scipy.stats.kurtosis(sigbufs[i, x:x + 200]) energ = energia(sigbufs[i, x:x + 200]) sha = shannon(sigbufs[i, x:x + 200]) #DWT cA5, cD4, cD3, cD2, cD1 = pywt.wavedec(sigbufs[i, x:x + 200], 'db4', level=4) varianzaA5 = stats.variance(cA5) energA5 = energia(cA5) shaA5 = shannon(cA5) actiA5 = hjorth(cA5) varianzaD4 = stats.variance(cD4) energD4 = energia(cD4) rD4 = renyi(cD4) shaD4 = shannon(cD4) EHD4 = hurst(cD4) actiA4 = hjorth(cD4) varianzaD3 = stats.variance(cD3) desviacionD3 = stats.stdev(cD3) energD3 = energia(cD3) rD3 = renyi(cD3) apenD3 = ApEn(cD3, 2, 3) shaD3 = shannon(cD3) minimoD2 = scipy.stats.tmin(cD2) maximoD2 = scipy.stats.tmax(cD2) desviacionD2 = stats.stdev(cD2) kurtoD2 = scipy.stats.kurtosis(cD2) energD2 = energia(cD2) rD2 = renyi(cD2) shaD2 = shannon(cD2) minimoD1 = scipy.stats.tmin(cD1) maximoD1 = scipy.stats.tmax(cD1) rD1 = renyi(cD1) #FFT nee = len(sigbufs[i, x:x + 200]) # tamaño Y = fft(sigbufs[i, x:x + 200]) / nee Yn = abs(Y) mediaf = stats.mean(Yn) #print (signal_labels[i]) Features[i + aumento] = [ minimo, maximo, kurto, energ, sha, varianzaA5, energA5, shaA5, actiA5, varianzaD4, energD4, rD4, shaD4, EHD4, actiA4, varianzaD3, desviacionD3, energD3, rD3, apenD3, shaD3, minimoD2, maximoD2, desviacionD2, kurtoD2, energD2, rD2, shaD2, minimoD1, maximoD1, rD1, mediaf ] #Labels=signal_labels[i] #print (Labels) x = x + 200 aumento = aumento + 18 ##16 -- n-2, 21 -- n-4, 15 -- n-3, 19 -- n-4, 43 ---- n-8 return Features
def clust_main(): parent = "/home/ethan/hiv/papers/jidletter/" outmi = open(parent + 'sumary.mi', 'w') outmi.write('freq,cut,p.clu,mean.clu,med.clu,std.clu,act.pri\n') out3 = open(parent + 'sumary.30y', 'w') out3.write('freq,cut,p.clu,mean.clu,med.clu,std.clu,act.pri\n') inf_mi, inf_3 = {}, {} clu_mi, clu_3 = {}, {} cuts = [6] cuts.extend([(x + 1) * 12 for x in range(19)]) for freq in np.linspace(0.05, 1.0, 20): inf_mi[freq], inf_3[freq] = [], [] infile = open(parent + "pkl/full/" + "lin0.pkl.full", 'r') data = cPickle.load(infile) infile.close() c_mi = data['clu_mi'] c_3 = data['clu_30y'] for inst in c_mi: freq = inst[0] cut = inst[1] if not clu_mi.has_key(freq): clu_mi[freq] = {} if not clu_mi[freq].has_key(cut): clu_mi[freq][cut] = [] for inst in c_3: freq = inst[0] cut = inst[1] if not clu_3.has_key(freq): clu_3[freq] = {} if not clu_3[freq].has_key(cut): clu_3[freq][cut] = [] for file in os.listdir(parent + "pkl/full/"): print file infile = open(parent + "pkl/full/" + file, 'r') data = cPickle.load(infile) infile.close() history = data['history'] smp_mi = data['samples_maxinc'] smp_3 = data['samples_30y'] c_mi = data['clu_mi'] c_3 = data['clu_30y'] for freq in np.linspace(0.05, 1.0, 20): for mi in smp_mi[freq]: inf_mi[freq].append(infectors_stage(history, mi)) for th in smp_3[freq]: inf_3[freq].append(infectors_stage(history, th)) for inst in c_mi: freq = inst[0] cut = inst[1] for i, tok in enumerate(inst): if i > 1: clu_mi[freq][cut].append(int(tok)) for inst in c_3: freq = inst[0] cut = inst[1] for i, tok in enumerate(inst): if i > 1: clu_3[freq][cut].append(int(tok)) for k, v in inf_mi.items(): pcount = 0 for tok in v: if tok == 'p': pcount += 1 sk = str(k) sk = sk + '00000000000000' inf_mi[sk[0:8]] = float(pcount) / len(v) for k, v in inf_3.items(): pcount = 0 for tok in v: if tok == 'p': pcount += 1 sk = str(k) sk = sk + '00000000000000' inf_3[sk[0:8]] = float(pcount) / len(v) for k, v in clu_mi.items(): for k2, v2 in v.items(): prclust = pr_clustering(v2) mean = stats.mean(v2) median = stats.median(v2) std = stats.tstd(v2) outmi.write('%s,%s,%f,%f,%f,%f,%f\n' % (k, k2, prclust, mean, median, std, inf_mi[k])) for k, v in clu_3.items(): for k2, v2 in v.items(): prclust = pr_clustering(v2) mean = stats.mean(v2) median = stats.median(v2) std = stats.tstd(v2) out3.write('%s,%s,%f,%f,%f,%f,%f\n' % (k, k2, prclust, mean, median, std, inf_3[k]))
import scipy.stats as stats import constante from funciones import normal_por_aceptacion_rechazo z = normal_por_aceptacion_rechazo(media=35, de=5) hist_data = [z] # ploteo data fig = ff.create_distplot(hist_data, [""], bin_size=.01, curve_type='normal') fig['layout'].update(title='Normal empirica vs Normal de python') py.plot(fig, filename='normal empirica vs normal de python') # Mostramos media, varianza y moda muestrales y teoricos media = st.mean(z) varianza = st.variance(z) moda = max(set(z), key=z.count) print("Media muestral: {0} Varianza muestral: {1} Moda muestral: {2}".format(media, varianza, moda)) print("Media teorica: {0} Varianza teorica: {1} Moda teorica: {2}".format(35, 5*5, 35)) # RESPUESTA 5 from funciones import gcl_uniforme import constante # genero numero aleatorios uniformes x_n = constante.SEMILLA uniformes = [] empiricos = [] for _ in range(constante.CANT_EXPERIMENTOS):
from scipy import stats import sys ephemerides = ReadEphemeridesLog(sys.argv[1]) comps = [] for e in ephemerides: c = ComparePysolarToUSNO(e) comps.append(c) az_errors = [c.az_error for c in comps] alt_errors = [c.alt_error for c in comps] print '---------------------' print 'Azimuth stats' print 'Mean error: ' + str(stats.mean(az_errors)) print 'Std dev: ' + str(stats.std(az_errors)) print 'Min error: ' + str(stats.tmin(az_errors, None)) print 'Max error: ' + str(stats.tmax(az_errors, None)) print '----------------------' print 'Altitude stats' print 'Mean error: ' + str(stats.mean(alt_errors)) print 'Std dev: ' + str(stats.std(alt_errors)) print 'Min error: ' + str(stats.tmin(alt_errors, None)) print 'Max error: ' + str(stats.tmax(alt_errors, None)) WriteComparisonsToCSV(comps, 'pysolar_v_usno.csv')