def normalized_diff_mean_power(multarray, labels, smoother_size=(5, 5)): lls = np.array(labels) # make sure this is an array # convert to log scale arr0 = np.log(multarray[lls == 0]) arr1 = np.log(multarray[lls == 1]) m0 = np.nanmean(arr0, axis=0) m1 = np.nanmean(arr1, axis=0) smoother = np.ones(smoother_size) smoother = smoother / np.sum(smoother) v0 = np.nanvar(arr0, axis=0, ddof=1) v1 = np.nanvar(arr1, axis=0, ddof=1) v0 = convolve2d(np.nan_to_num(v0), smoother, mode='same') v1 = convolve2d(np.nan_to_num(v1), smoother, mode='same') s0 = np.sqrt(v0) s1 = np.sqrt(v1) n0 = np.sum(lls == 0) n1 = np.sum(lls == 1) numer = m0 - m1 + 0.5 * (s0 ** 2 - s1 ** 2) denom = np.sqrt((s0 ** 2 / n0) + (s1 ** 2 / n1) + (s0 ** 4 / (n0 - 1)) + (s1 ** 4 / (n1 - 1))) return numer / denom
def calc_stresses(self, beamvel, beamAng): """ Calculate the stresses from the difference in the beam variances. Reference: Stacey, Monosmith and Burau; (1999) JGR [104] "Measurements of Reynolds stress profiles in unstratified tidal flow" """ fac = 4 * np.sin(self['config']['beam_angle'] * deg2rad) * \ np.cos(self['config']['beam_angle'] * deg2rad) # Note: Stacey defines the beams incorrectly for Workhorse ADCPs. # According to the workhorse coordinate transformation # documentation, the instrument's: # x-axis points from beam 1 to 2, and # y-axis points from beam 4 to 3. # Therefore: stress = ((np.nanvar(self.reshape(beamvel[0]), axis=-1) - np.nanvar(self.reshape(beamvel[1]), axis=-1)) + 1j * (np.nanvar(self.reshape(beamvel[2]), axis=-1) - np.nanvar(self.reshape(beamvel[3]), axis=-1)) ) / fac if self.config.orientation == 'up': # This comes about because, when the ADCP is 'up', the u # and w velocities need to be multiplied by -1 (equivalent # to adding pi to the roll). See the coordinate # transformation documentation for more info. # # The uw (real) component has two minus signs, but the vw (imag) # component only has one, therefore: stress.imag *= -1 stress *= rotate.inst2earth_heading(self) if self.props['coord_sys'] == 'principal': stress *= np.exp(-1j * self.props['principal_angle']) return stress.real, stress.imag
def plot_profile_TKE_wind(synth): fig,ax=plt.subplots() colors=get_colors(synth) c=0 for key,value in synth.iteritems(): for v in value: scase=str(key).zfill(2) sleg=str(v).zfill(2) synthfile=base_dir+'c'+scase+'/leg'+sleg+'.cdf' U = read_synth(synthfile,'F2U') V = read_synth(synthfile,'F2V') Z = read_synth(synthfile,'z') x=[] y=[] for n,z in enumerate(Z[1:15]): u=U[:,:,n+1] v=V[:,:,n+1] u_var=np.nanvar(u) v_var=np.nanvar(v) TKE=(u_var+v_var)/2. x.append(TKE) y.append(z) label='Case: '+scase+' Leg: '+sleg ax.plot(x,y,'-',label=label,color=colors[c]) ax.set_ylim([0,4]) ax.set_xlabel('TKE [m2 s^-2]') ax.set_ylabel('Altitude MSL [km]') c+=1 plt.suptitle('Spatial TKE at P3 synth levels ') plt.draw() plt.legend()
def plot_profile_variance(dbz,vvel,ht, ax,case,ncases): dbz_variance=[] vvel_variance=[] count_gates=[] global ti global n global colors if n==0: # colors=sns.color_palette('hls',ncases) colors=sns.color_palette('Paired',ncases) for i in range(len(ht)): dbz_variance.append(np.nanvar(dbz[i,:])) vvel_variance.append(np.nanvar(vvel[i,:])) count_gates.append(vvel[i,:].size-np.sum(np.isnan(vvel[i,:]))) inid=datetime(*(reqdates[case]['ini']+[0,0])) endd=datetime(*(reqdates[case]['end']+[0,0])) ti.append('\nCase '+case+': '+inid.strftime('%Y-%b %dT%H:%M')+endd.strftime(' - %dT%H:%M UTC')) if n<7: marker='None' # marker='o' else: marker='o' dbzv=[0,180] vvelv=[0,6] if np.any(ax): ax[0].plot(dbz_variance,ht,marker=marker,color=colors[n]) ax[1].plot(vvel_variance,ht,marker=marker,color=colors[n]) ax[2].plot(count_gates,ht,marker=marker,color=colors[n],label='case '+case) n+=1 else: fig,ax=plt.subplots(1,3,sharey=True,figsize=(12,8)) ax[0].plot(dbz_variance,ht,color=colors[n]) ax[1].plot(vvel_variance,ht,color=colors[n]) ax[2].plot(count_gates,ht,color=colors[n], label='case '+case) ax[0].set_ylabel('Height MSL [km]') ax[0].set_xlabel('Reflectivity [dBZ^2]') ax[1].set_xlabel('Vertical velocity [m2 s^-2]') ax[2].set_xlabel('Count good gates') ax[0].set_xlim(dbzv) ax[1].set_xlim(vvelv) n+=1 return ax if n==ncases and ncases==4: plt.suptitle('SPROF time variance'+''.join(ti)) plt.subplots_adjust(top=0.85, left=0.05, right=0.95, wspace=0.05) ax[2].legend(loc='lower left') elif n==ncases and ncases>4: plt.suptitle('SPROF time variance') plt.subplots_adjust(top=0.9, left=0.05, right=0.95, wspace=0.06) ax[2].legend() plt.draw()
def test_nanvar(self): tgt = np.var(self.mat) for mat in self.integer_arrays(): assert_equal(np.nanvar(mat), tgt) tgt = np.var(mat, ddof=1) for mat in self.integer_arrays(): assert_equal(np.nanvar(mat, ddof=1), tgt)
def test_nanvar(eng): original = arange(24).reshape((2, 3, 4)).astype(float64) data = fromlist(list(original), engine=eng) assert allclose(data.nanvar().shape, (1, 3, 4)) assert allclose(data.nanvar().toarray(), nanvar(original, axis=0)) original[0, 2, 3] = nan original[1, 0, 2] = nan original[1, 2, 2] = nan data = fromlist(list(original), engine=eng) assert allclose(data.nanvar().shape, (1, 3, 4)) assert allclose(data.nanvar().toarray(), nanvar(original, axis=0))
def bayes_precision(x, y, distribution='normal', posterior_width=0.08, num_iters=25000, inference='sampling'): """ Bayes precision computation. :param x: sample of a treatment group :type x: pd.Series or list (array-like) :param y: sample of a control group :type y: pd.Series or list (array-like) :param distribution: name of the KPI distribution model, which assumes a Stan model file with the same name exists :type distribution: str :param posterior_width: the stopping criterion, threshold of the posterior width :type posterior_width: float :param num_iters: number of iterations of bayes sampling :type num_iters: int :param inference: sampling or variational inference method for approximation the posterior :type inference: str :return: results of type EarlyStoppingTestStatistics (without p-value and stat. power) :rtype: EarlyStoppingTestStatistics """ logger.info("Started running bayes precision with {} procedure, treatment group of size {}, " "control group of size {}, {} distribution.".format(len(x), len(y), distribution, inference)) traces, n_x, n_y, mu_x, mu_y = _bayes_sampling(x, y, distribution=distribution, num_iters=num_iters, inference=inference) trace_normalized_effect_size = get_trace_normalized_effect_size(distribution, traces) trace_absolute_effect_size = traces['delta'] credible_mass = 0.95 left_out = 1.0 - credible_mass p1 = round(left_out/2.0, 5) p2 = round(1.0 - left_out/2.0, 5) credible_interval_delta = HDI_from_MCMC(trace_absolute_effect_size, credible_mass) credible_interval_delta_normalized = HDI_from_MCMC(trace_normalized_effect_size, credible_mass) stop = credible_interval_delta_normalized[1] - credible_interval_delta_normalized[0] < posterior_width treatment_statistics = SampleStatistics(int(n_x), float(mu_x), float(np.nanvar(x))) control_statistics = SampleStatistics(int(n_y), float(mu_y), float(np.nanvar(y))) variant_statistics = BaseTestStatistics(control_statistics, treatment_statistics) logger.info("Finished running bayes precision with {} procedure, treatment group of size {}, " "control group of size {}, {} distribution.".format(len(x), len(y), distribution, inference)) return EarlyStoppingTestStatistics(variant_statistics.control_statistics, variant_statistics.treatment_statistics, float(mu_x - mu_y), dict([(p * 100, v) for p, v in zip([p1, p2], credible_interval_delta)]), None, None, stop)
def test_nanvar(eng): arr = array([arange(8), arange(8)]).astype(float64) data = fromarray(arr, engine=eng) val = data.nanvar().toarray() expected = nanvar(data.toarray(), axis=0) assert allclose(val, expected) assert str(val.dtype) == 'float64' arr[0, 4] = nan arr[1, 3] = nan arr[1, 4] = nan data = fromarray(arr, engine=eng) val = data.nanvar().toarray() expected = nanvar(data.toarray(), axis=0) assert allclose(val, expected, equal_nan=True) assert str(val.dtype) == 'float64'
def test_GWAS(self): Y = np.genfromtxt(self._liverPhenos) # Loading npdump and first 1000 snps for speed K = np.load(self._liverKinshipMatrix) snps = np.load(self._liver1000SNPFile).T vars = np.nanvar(snps, axis=0) #variances across the rows ignoring NaN, used to check which SNPs were not polymorphic across the given individuals TS,PS = lmm.GWAS(Y,snps,K,REML=True,refit=True) #SNPs that are not polymorphic (in the given individuals being tested) will have variance 0, this check ensures #that only these SNPs have a return value of NaN for i in range(len(PS)): self.assertTrue( not math.isnan(PS[i]) or vars[i] == 0, "NaN found in results corresponding to polymorphic SNP") results = np.array([TS,PS]) ansKey = np.load(self._liverTestFile) #these results include np.nan values, so allclose cannot be used, also the results are similar with each #run, but do vary, so we can only check for similarity to a precision of about 1e-06 for i in range(results.shape[0]): for j in range(results.shape[1]): a = results[i,j] b = ansKey[i,j] self.assertTrue( (np.isnan(a) and np.isnan(b)) or abs(a - b) < 1e-06 , "Mismatch on values: " + str(a) + " and " + str(b))
def c(self, P, h, bw): """Calculate the sill""" c = np.nanvar(P[:, 2]) if h == 0: return c else: return c - self.semivarh(P, h, bw)
def compute(self, today, assets, out, close): # get returns dataset returns = ((close - np.roll(close, 1, axis=0)) / np.roll(close, 1, axis=0))[1:] # get index of benchmark benchmark_index = np.where((assets == 8554) == True)[0][0] # get returns of benchmark benchmark_returns = returns[:, benchmark_index] # prepare X matrix (x_is - x_bar) X = benchmark_returns X_bar = np.nanmean(X) X_vector = X - X_bar X_matrix = np.tile(X_vector, (len(returns.T), 1)).T # prepare Y matrix (y_is - y_bar) Y_bar = np.nanmean(close, axis=0) Y_bars = np.tile(Y_bar, (len(returns), 1)) Y_matrix = returns - Y_bars # prepare variance of X X_var = np.nanvar(X) # multiply X matrix an Y matrix and sum (dot product) # then divide by variance of X # this gives the MLE of Beta out[:] = (np.sum((X_matrix * Y_matrix), axis=0) / X_var) / (len(returns))
def cal_stats(in_fc, col_names): """Calculate stats for an array of double types, with nodata (nan, None) : in the column. :Requires: :--------- : in_fc - input featureclass or table : col_names - the columns... numeric (floating point, double) : :Notes: :------ see the args tuple for examples of nan functions : np.nansum(b, axis=0) # by column : np.nansum(b, axis=1) # by row : c_nan = np.count_nonzero(~np.isnan(b), axis=0) count nan if needed """ a = arcpy.da.FeatureClassToNumPyArray(in_fc, col_names) # "*") b = a.view(np.float).reshape(len(a), -1) if len(a.shape) == 1: ax = 0 else: ax = [1, 0][True] # ax = [1, 0][colwise] colwise= True mask = np.isnan(b) cnt = np.sum(~mask, axis=ax, dtype=np.intp, keepdims=False) n_sum = np.nansum(b, axis=0) n_mean = np.nanmean(b, axis=0) n_var = np.nanvar(b, axis=0) n_std = np.nanstd(b, axis=0) sk, kurt = skew_kurt(b, avg=n_mean, var_x=n_var, std_x=n_std, col=True, mom='both') args = (col_names, cnt, n_sum, np.nanmin(b, axis=0), np.nanmax(b, axis=0), np.nanmedian(b, axis=0), n_mean, n_std, n_var, sk, kurt) return col_names, args
def _fit_model(self, fcol, dis): """Determine the best fit for one feature column given distribution name Parameters ---------- fcol: feature column, array dis: distribution name, String Returns ---------- function: fit model with feature as argument """ if dis == 'ratio': itfreq = itemfreq(fcol) uniqueVars = itfreq[:,0] freq = itfreq[:,1] rat = freq/sum(freq) rat = dict(zip(uniqueVars, rat.T)) func = lambda x: self. funcs[dis](x, rat) if dis == 'poisson': lamb = np.nanmean(fcol, axis = 0) func = lambda x: self.funcs[dis](x, lamb) if dis == 'norm': sigma = np.nanvar(fcol, axis=0) theta = np.nanmean(fcol, axis = 0) func = lambda x: self.funcs[dis](x, sigma, theta) return np.vectorize(func)
def cal_stats(a): """Calculate stats for an array of double types, with nodata (nan, None) in the column. Notes ----- see the args tuple for examples of nan functions:: >>> np.nansum(b, axis=0) # by column >>> np.nansum(b, axis=1) # by row >>> c_nan = np.count_nonzero(~np.isnan(b), axis=0) count nan if needed """ if len(a.shape) == 1: ax = 0 else: ax = [1, 0][True] # ax = [1, 0][colwise] colwise= True mask = np.isnan(a) n = len(a) cnt = np.sum(~mask, axis=ax, dtype=np.intp, keepdims=False) n_sum = np.nansum(a, axis=0) n_min = np.nanmin(a, axis=0) n_max = np.nanmax(a, axis=0) n_mean = np.nanmean(a, axis=0) n_med = np.nanmedian(a, axis=0) n_std = np.nanstd(a, axis=0) n_var = np.nanvar(a, axis=0) col_names = ['N', 'n', 'sum', 'min', 'max', 'mean', 'median', 'std', 'var', 'skew', 'kurt'] sk, kurt = skew_kurt(a, avg=n_mean, var_x=n_var, std_x=n_std, col=True, mom='both') args = [n, cnt, n_sum, n_min, n_max, n_mean, n_med, n_std, n_var, sk, kurt] z = list(zip(col_names, args)) s = "".join(["\n{:<6} {}".format(*i) for i in z]) return s
def std(values, errors): """takes two numpy arrays: values and errors in these values estimate the grand standard deviation as error value not accurate for multiple rounds of averaging""" var1 = np.nanvar(values, ddof=1) # variance in the values var2 = np.nanmean(np.square(errors)) # mean-square of input errors return np.sqrt(var1 + var2)
def _compute_zr2011_dataframe(self): """ Get the dataframe needed for the mid-range temperatures, and add a mass bin for 6000 K using the gyrochronology relation. """ # Read in the dataframe from disk. df = pd.read_csv('data/velocity_pdfs.csv', header=1) # Compute equatorial velocities for a 6000 K star at this age. teff = np.ones_like(self.age) * 6000.0 v_eq = self._gyro_velocities(teff, self.age).to(u.km/u.s).value v_eq[v_eq > 500] = np.nan # Remove unphysical vsini values. # Calculate approximate maxwellian parameters from the velocities. alpha = np.sqrt(np.nanvar(v_eq) * np.pi / (3*np.pi - 8)) l = np.nanmedian(v_eq) - 2*alpha*np.sqrt(2/np.pi) # Add a row to the dataframe with this information df.loc[df.index.max()+1] = [1.0, 1.24, 0, 25, 100, alpha*np.sqrt(2), l] # Calculate a few more columns for the dataframe df['mid_mass'] = (df.mass_high + df.mass_low) / 2.0 df['slow_alpha'] = df.slow_mu / np.sqrt(2) df['fast_alpha'] = df.fast_mu / np.sqrt(2) df['slow_frac'] /= 100.0 df['fast_frac'] /= 100.0 # Sort so that interpolation works df = df.sort_values(by='mid_mass').reset_index() return df
def autocorrelation_hourly(data): from matplotlib.pyplot import plot, xlabel, ylabel, show from numpy import nanmean, nanvar, mean, multiply, arange # We choose 7 days and plus-minus 6 hours as the possible periodicity # in traffic. START_PERIOD = 7*24 - 6 END_PERIOD = 7*24 + 6 V = replace_placeholder(data, value = nanmean(data)) # We don't take the variance of entries that we replaced with nanmean. sigma2 = nanvar(data) autocorr_dict = {period:0 for period in range(START_PERIOD,END_PERIOD+1)} Deviations = V - nanmean(V, axis=0) for period in range(START_PERIOD, END_PERIOD+1): autocorr = nanmean([multiply(Deviations[t],Deviations[t+period]) for t in range(len(V)-period)])/sigma2 autocorr_dict[period] = autocorr print(period) # Peaks in plot correspond to high autocorrelation i.e. high # periodicity trend. plot(arange(START_PERIOD, END_PERIOD+1), [autocorr_dict[period] for period in range(START_PERIOD, END_PERIOD+1)], 'o-') ylabel('Average autocorellation over full links') xlabel('Assumed period of data (in hours)') show() #legend(bbox_to_anchor=(1.35, 0.95)) return None
def test_var(): out = df.i32.reshape((2, 2, 5)).var(axis=2).T eq(c.points(df, 'x', 'y', ds.var('i32')), out) eq(c.points(df, 'x', 'y', ds.var('i64')), out) out = np.nanvar(df.f64.reshape((2, 2, 5)), axis=2).T eq(c.points(df, 'x', 'y', ds.var('f32')), out) eq(c.points(df, 'x', 'y', ds.var('f64')), out)
def get_FR_stats(hdf, save=False, return_=False, plot=True): eps = 10**-12 sc = hdf.root.task[:]['spike_counts'] mn = np.nanmean(sc[:, :, 0], axis=0) vr = np.nanvar(sc[:, :, 0], axis=0) ff = vr/(mn+eps) if plot: f, ax = plt.subplots() ax.hist(np.mean(sc[:, :, 0], axis=0)) ax.set_title('Hist. of Mean FR.') ax.set_xlabel('FR') ax.set_ylabel('Counts') f2, ax2 = plt.subplots() try: ax2.hist(ff) except: print 'error FF: ', ff ax2.set_title('Hist. of Fano Factor') ax2.set_xlabel('Fano Factor') ax2.set_ylabel('Counts') if save: f.savefig(hdf.filename[:-4]+'_mnFR.png', format='png') f2.savefig(hdf.filename[:-4]+'_FF.png', format='png') if return_: return np.mean(sc[:,:,0], axis=0), ff
def ExponentialTransformErrVarShapingFactor(data, comparedata,G=10): """ This function use the variance of the error terms between observed and simulated data as a base to claculate the likelihood. .. math:: p=-G\\cdot Var(E(x)) The factor `G` comes from the DREAMPar model. So this factor can be changed according to the used model. For more details see also: http://onlinelibrary.wiley.com/doi/10.1029/95WR03723/epdf. `Usage:` Maximize the likelihood value guides to the best model. :param data: observed measurements as a numerical list :type data: list :param comparedata: simulated data from a model which should fit the original data somehow :type comparedata: list :param G: DREAMPar model parameter `G` :type G: float :return: the p value as a likelihood :rtype: float """ __standartChecksBeforeStart(data, comparedata) errArr = np.array(__calcSimpleDeviation(data, comparedata)) return -G*np.nanvar(errArr)
def computeFisherScore(data, class_ass, nb_classes): ''' The Fisher Score assigns a rank to each of the features, with the goal of finding the subset of features of the data such that in the data space spanned by the selected features, the distance between data points in different classes are as large as possible and the distance between data points in the same class are as small as possible. Input - data: matrix of inputs, size N x M, where N is the number of trials and M is the number of features - class_ass: array of class assignments, size 1 x N, where N is the number of trials - nb_classes: number of classes Output - Fscores: array of scores, size 1 x M, for each of the features ''' num_trials, num_features = data.shape within_class_mean = np.zeros([nb_classes,num_features]) # mean for each feature within each class within_class_var = np.zeros([nb_classes,num_features]) # variance for each feature within each class num_points_within_class = np.zeros([1,nb_classes]) # number of points within each class for i in range(nb_classes): in_class = np.ravel(np.nonzero(class_ass == i)) num_points_within_class[0,i] = len(in_class) class_data = data[in_class,:] # extract trails classified as belonging to this class within_class_mean[i,:] = np.nanmean(class_data, axis=0) # length of mean vector should be equal to M, the number of features within_class_var[i,:] = np.nanvar(class_data,axis=0) between_class_mean = np.asmatrix(np.mean(within_class_mean,axis=0)) between_class_mean = np.dot(np.ones([nb_classes,1]), between_class_mean) Fscores = np.dot(num_points_within_class,np.square(within_class_mean - between_class_mean))/np.dot(num_points_within_class,within_class_var) return Fscores
def get_stats(a): """Computes mean, D_T or D_R, and standard error for a list. """ a = np.asarray(a) n = a.shape[-1] keepdims = a.ndim > 1 M = np.nanmean(a, -1, keepdims=keepdims) # c = a - M # variance = np.einsum('...j,...j->...', c, c)/n variance = np.nanvar(a, -1, keepdims=keepdims, ddof=1) SE = np.sqrt(variance)/sqrt(n - 1) SK = skew(a, -1, nan_policy='omit') KU = kurtosis(a, -1, nan_policy='omit') SK_t = skewtest(a, -1, nan_policy='omit') KU_t = kurtosistest(a, -1, nan_policy='omit') if keepdims: SK = SK[..., None] KU = KU[..., None] else: SK = float(SK) KU = float(KU) stat = {'mean': M, 'var': variance, 'std': SE, 'skew': SK, 'skew_test': float(SK_t.statistic), 'kurt': KU, 'kurt_test': float(KU_t.statistic)} print '\n'.join(['{:>10}: {: .4f}'.format(k, v) for k, v in stat.items()]) return stat
def fit_cols(self, attributes, x, n_vals): """ Return `EuclideanColumnsModel` with stored means and variances for normalization and imputation. """ def nowarn(msg, cat, *args, **kwargs): if cat is RuntimeWarning and ( msg == "Mean of empty slice" or msg == "Degrees of freedom <= 0 for slice"): if self.normalize: raise ValueError("some columns have no defined values") else: orig_warn(msg, cat, *args, **kwargs) self.check_no_discrete(n_vals) # catch_warnings resets the registry for "once", while avoiding this # warning would be annoying and slow, hence patching orig_warn = warnings.warn with patch("warnings.warn", new=nowarn): means = np.nanmean(x, axis=0) vars = np.nanvar(x, axis=0) if self.normalize and not vars.all(): raise ValueError("some columns are constant") return EuclideanColumnsModel( attributes, self.impute, self.normalize, means, vars)
def plot_profile_variance_dbz(synth): fig,ax=plt.subplots() colors=get_colors(synth) c=0 for key,value in synth.iteritems(): for v in value: scase=str(key).zfill(2) sleg=str(v).zfill(2) synthfile=base_dir+'c'+scase+'/leg'+sleg+'.cdf' DBZ = read_synth(synthfile,'MAXDZ') Z = read_synth(synthfile,'z') x=[] y=[] for n,z in enumerate(Z[1:15]): dbz=DBZ[:,:,n+1] # dbz[dbz<15]=np.nan x.append(np.nanvar(dbz)) # zz=10**(dbz/10.) # x.append(np.nanvar(zz)) # similar than dbz but in linear scale y.append(z) label='Case: '+scase+' Leg: '+sleg ax.plot(x,y,'-',label=label,color=colors[c]) ax.set_ylim([0,4]) ax.set_xlim([0,70]) ax.set_xlabel('Reflectivity variance [dBZ^2]') ax.set_ylabel('Altitude MSL [km]') c+=1 plt.suptitle('Spatial variance at P3 synth levels ') plt.draw() plt.legend()
def plot_profile_variance_wind(synth): fig,ax=plt.subplots() colors=get_colors(synth) c=0 for key,value in synth.iteritems(): for v in value: scase=str(key).zfill(2) sleg=str(v).zfill(2) synthfile=base_dir+'c'+scase+'/leg'+sleg+'.cdf' U = read_synth(synthfile,'F2U') V = read_synth(synthfile,'F2V') Z = read_synth(synthfile,'z') x=[] y=[] for n,z in enumerate(Z[1:15]): u=U[:,:,n+1] v=V[:,:,n+1] wdir = (np.arctan2(u,v)*180/np.pi)+180. x.append(np.nanvar(wdir)) y.append(z) label='Case: '+scase+' Leg: '+sleg ax.plot(x,y,'-',label=label,color=colors[c]) ax.set_ylim([0,4]) ax.set_xlim([0,700]) ax.set_xlabel('Wind direction variance [deg^2]') ax.set_ylabel('Altitude MSL [km]') c+=1 plt.suptitle('Spatial variance at P3 synth levels ') plt.draw() plt.legend()
def nanvar(self): """ Compute the sum across images ignoring the NaNs """ if self.mode == 'spark': return self._constructor(self.values.nanvar(axis=0, keepdims=True)) else: return self._constructor(expand_dims(nanvar(self.values, axis=0), axis=0))
def nanvar(self): """ Compute the variance across records """ if self.mode == 'spark': return self._constructor(self.values.nanvar(axis=self.baseaxes, keepdims=True)) else: return self._constructor(expand_dims(nanvar(self.values, axis=self.baseaxes), axis=self.baseaxes[0]))
def test_nanvar_with_ddof(self): x = np.random.uniform(0, 10, (20, 100)) np.fill_diagonal(x, np.nan) for axis in [None, 0, 1]: np.testing.assert_almost_equal( np.nanvar(x, axis=axis, ddof=10), nanvar(csr_matrix(x), axis=axis, ddof=10), )
def NashSutcliffeEfficiencyShapingFactor(data, comparedata,G=10): """ This function use the opposite ratio of variance of the error terms between observed and simulated and the variance of the observed data as a base to claculate the likelihood and transform the values with the logarithm. .. math:: p=G\\cdot\\log(1-\\frac{Var(E(x)}{Var(Y)}) The factor `G` comes from the DREAMPar model. So this factor can be changed according to the used model. For more details see also: http://onlinelibrary.wiley.com/doi/10.1029/95WR03723/epdf. `Usage:` Maximize the likelihood value guides to the best model. If the function return NAN, than you can not use this calculation method or the `comparedata` is too far away from `data`. :param data: observed measurements as a numerical list :type data: list :param comparedata: simulated data from a model which should fit the original data somehow :type comparedata: list :param G: DREAMPar model parameter `G` :type G: float :return: the p value as a likelihood :rtype: float """ __standartChecksBeforeStart(data, comparedata) errArr = np.array(__calcSimpleDeviation(data, comparedata)) if np.nanvar(data) == 0.0: warnings.warn("[NashSutcliffeEfficiencyShapingFactor] reaslized that the variance of the data is zero. Thereforee is no likelihood calculation possible") return np.NAN else: ratio = np.nanvar(errArr)/np.nanvar(data) if ratio > 1: warnings.warn("[NashSutcliffeEfficiencyShapingFactor]: The ratio between residual variation and observation " "variation is bigger then one and therefore" "we can not calculate the liklihood. Please use another function which fits to this data and / or " "model") return np.NAN else: return G*np.log(1-ratio)
def test_var(): out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).var(axis=2, dtype='f8').T, coords=coords, dims=dims) assert_eq(c.points(ddf, 'x', 'y', ds.var('i32')), out) assert_eq(c.points(ddf, 'x', 'y', ds.var('i64')), out) out = xr.DataArray(np.nanvar(df.f64.values.reshape((2, 2, 5)), axis=2).T, coords=coords, dims=dims) assert_eq(c.points(ddf, 'x', 'y', ds.var('f32')), out) assert_eq(c.points(ddf, 'x', 'y', ds.var('f64')), out)
def causal_snp(n=10): f = '../data/arabi.bim' d = pd.read_csv(f, sep='\t', header=None) for j in range(n): r = np.random.choice(d[1], 10) with open('causal_{}.snplist'.format(j), 'w+') as f: for i in r: f.write(i + '\n') print('into gene_expr') bxy = args['bxy'] bzx = args['bzx'] rzx2 = args['rzx2'] rxy2 = args['rxy2'] c = args['c'] rc2 = args['rc2'] if causal_lst: m = 1200 # n = np.random.randint(30, 100) n = 600 r1 = np.random.choice(tmp['SNP'], n, replace=False) p = np.array(tmp[tmp['SNP'].isin(r1)]['MAF']) w = binomial(1, p) r2 = np.random.choice(tmp['SNP'], m - n, replace=False) p2 = np.array(tmp[tmp['SNP'].isin(r2)]['MAF']) w2 = binomial(1, p2) with open('tmp/w_{}'.format(ind), 'w+') as f: for i in w: f.write(str(i) + '\n') with open('tmp/w2_{}'.format(ind), 'w+') as f: for i in w2: f.write(str(i) + '\n') with open('tmp/p_{}'.format(ind), 'w+') as f: # f.write(' '.join(str(i))+'\n') for i in p: f.write(str(i) + '\n') with open('tmp/r1_{}'.format(ind), 'w+') as f: for i in r1: f.write(str(i) + '\n') with open('tmp/r2_{}'.format(ind), 'w+') as f: for i in r2: f.write(str(i) + '\n') if causal_files: r1 = np.array([i.strip() for i in open(causal_files[0]).readlines()]) r2 = np.array([i.strip() for i in open(causal_files[1]).readlines()]) p = np.array( [float(i.strip()) for i in open(causal_files[2]).readlines()]) w = np.array( [int(i.strip()) for i in open(causal_files[3]).readlines()]) w2 = np.array( [int(i.strip()) for i in open(causal_files[4]).readlines()]) n = len(r1) m = len(r2) print(r1, r2, p, w, w2) # print(w.shape) # print(p.shape) z = (w - 2 * p) / (np.sqrt(2 * p * (1 - p))) z_bzx = z * bzx w_bzx = w * bzx var_z_bzx = np.nanvar(z_bzx) var_w_bzx = np.nanvar(w_bzx) if not rc2: rc2 = uniform(0, 0.5) sigmac_2 = var_z_bzx * rc2 / rzx2 # c = normal(0, sigmac_2, n) if not c: c = normal(0, sigmac_2) # ezx = normal(0, var_w_bzx*(1/rzx2 -1)-sigmac_2, n) ezx = normal(0, var_w_bzx * (1 / rzx2 - 1) - sigmac_2) # print(z_bzx.shape) # print(c.shape) # print(ezx.shape) x = z_bzx + c + ezx x = sum(x) lst = list(r1) + list(r2) geno_012 = list(w) + list(w2) print(len(geno_012)) print(len(lst)) # causal = dict(zip(lst, geno_012)) causal = pd.DataFrame() causal['012'] = geno_012 causal['SNP'] = lst ref = 'geno_ref.txt' ref = pd.read_csv(ref) geno_GCTA = pd.merge(causal, ref, left_on='SNP', right_on='SNP') # print(geno_GCTA.head()) pool = Pool(30) r = [] r2 = [] for i in range(geno_GCTA.shape[0]): r.append(pool.apply_async( gene_expr_worker, [geno_GCTA.iloc[i, :]], )) for i in r: r2.append(i.get()) geno_GCTA = pd.concat(r2, axis=1).T print(geno_GCTA.head()) # geno_GCTA = geno_GCTA.apply(gene_expr_worker, axis=1) geno_GCTA = geno_GCTA.sort_values(by='SNP') print(geno_GCTA.head()) out = ''.join(geno_GCTA['new']) print(x, out[:10]) return (x, out, sigmac_2, c)
visual_util.plot_base_prediction( base_pred=base_pred_dict, model_names=ensemble_model_names, X_valid=X_valid, y_valid=y_valid_mean, title_size=16, legend_size=12, y_range=[-2.5, 2.5], save_addr=os.path.join( _SAVE_ADDR_PREFIX, "{}/ensemble_base_model_fit_no_data.png".format(family_name))) """ 3.5.2. visualize: ensemble posterior predictive mean """ posterior_mean_mu = np.nanmean(ensemble_mean_val, axis=0) posterior_mean_cov = np.nanvar(ensemble_mean_val, axis=0) posterior_resid_cov = np.nanvar(ensemble_resid_valid_sample, axis=0) posterior_dist_cov = np.nanvar(ensemble_sample_val, axis=0) - posterior_resid_cov posterior_mean_median = np.nanmedian(ensemble_mean_val, axis=0) posterior_mean_quantiles = [ np.percentile(ensemble_mean_val, [100 - (100 - q) / 2, (100 - q) / 2], axis=0) for q in [68, 95, 99] ] visual_util.gpr_1d_visual( posterior_mean_mu, pred_cov=posterior_mean_cov, X_train=X_test,
def getStatistics(windows): mean = np.nanmean(windows, axis=0) var = np.nanvar(windows, axis=0) return mean, var
DT = Te - Tp PacT = T2 - T1 # ---------------------------------------------- # # AVERAGE AND VARIANCE T FIELD # ---------------------------------------------- # Tav = T[0] for i in range(0, len(T[0])): for j in range(0, len(T[0, 0])): Tav[i, j] = np.nanmean(T[:, i, j]) Tvar = T[1] for i in range(0, len(T[0])): for j in range(0, len(T[0, 0])): Tvar[i, j] = np.nanvar(T[:, i, j]) # ---------------------------------------------- # # REMOVE SEASONALITY # ---------------------------------------------- # Dtau = [] DTmeans = [] Temppart = [] Monthsing = np.linspace(1, 12, 12) for i in range(0, 12): meantje = np.nanmean(tauvec[np.arange(i, 36, 12)]) Dtau.append(meantje) meantje = np.nanmean(Te[np.arange(i, 36, 12)] - Tp[np.arange(i, 36, 12)]) DTmeans.append(meantje) meantje = np.nanmean(T2[np.arange(i, 36, 12)] - T1[np.arange(i, 36, 12)])
fracy2 = 390 fracx1 = 180 fracx2 = 250 a[fracy1:fracy2, fracx1:fracx2] = h[0, fracy1:fracy2, fracx1:fracx2] print a.shape, type(a) # Filter by elevation(band) za = np.ma.masked_outside(altitude, 1000, 1500) a[za.mask == True] = np.nan T0 = 273.15 # Do statistics print "Mean: {0}".format(np.nanmean(a)) print "Standard deviation: {0}".format(np.nanstd(a, dtype=np.float64)) print "Variance: {0}".format(np.nanvar(a)) print "Average: {0}".format(np.average(a)) print "Min: {0}".format(np.nanmin(a)) print "Max: {0}".format(np.nanmax(a)) ''' Can use 'altitude' to filter out alpine regions or elevation bands ''' # View map and data fig = plt.figure(figsize=(10, 12)) ax = fig.add_subplot(111) xyext = [x[0], x[-1], y[0], y[-1]] plt.imshow(bkgmap, zorder=0, origin='lower', cmap='pink', extent=xyext) plt.imshow(a, alpha=0.8, zorder=1, origin='lower', cmap='seismic', extent=xyext)
# Create bounding box limits xmin, xmax, ymin, ymax = (xp.min() - 50. * dx), (xp.max() + 50. * dx), \ (yp.min() - 50. * dy), (yp.max() + 50. * dy) # Construct the grid Xi, Yi = make_grid(xmin, xmax, ymin, ymax, dx, dy) # Flatten prediction grid xi = Xi.ravel() yi = Yi.ravel() # Markov-model parameter a = 0.9132 * alpha # Signal variance of entire field c0 = np.nanvar(zp) # Compute noise variance crms = sigma * sigma # Output vectors zi = np.ones(len(xi)) * np.nan ei = np.ones(len(xi)) * np.nan ni = np.ones(len(xi)) * np.nan # Determine nobs for tree if mode == 'rand': n_quad = 16 else: n_quad = 8
def stats(X, weights=None, compute_variance=False): """ Compute min, max, #nans, mean and variance. Result is a tuple (min, max, mean, variance, #nans, #non-nans) or an array of shape (len(X), 6). The mean and the number of nans and non-nans are weighted. Computation of variance requires an additional pass and is not enabled by default. Zeros are filled in instead of variance. Parameters ---------- X : array_like, 1 or 2 dimensions Input array. weights : array_like, optional Weights, array of the same length as `x`. compute_variance : bool, optional If set to True, the function also computes variance. Returns ------- out : a 6-element tuple or an array of shape (len(x), 6) Computed (min, max, mean, variance or 0, #nans, #non-nans) Raises ------ ValueError If the length of the weight vector does not match the length of the array """ is_numeric = np.issubdtype(X.dtype, np.number) is_sparse = sp.issparse(X) weighted = weights is not None and X.dtype != object def weighted_mean(): if is_sparse: w_X = X.multiply(sp.csr_matrix(np.c_[weights] / sum(weights))) return np.asarray(w_X.sum(axis=0)).ravel() else: return np.nansum(X * np.c_[weights] / sum(weights), axis=0) if X.size and is_numeric and not is_sparse: nans = np.isnan(X).sum(axis=0) return np.column_stack( (np.nanmin(X, axis=0), np.nanmax(X, axis=0), np.nanmean(X, axis=0) if not weighted else weighted_mean(), np.nanvar(X, axis=0) if compute_variance else np.zeros( X.shape[1]), nans, X.shape[0] - nans)) elif is_sparse and X.size: if compute_variance: raise NotImplementedError non_zero = np.bincount(X.nonzero()[1], minlength=X.shape[1]) X = X.tocsc() return np.column_stack(( nanmin(X, axis=0), nanmax(X, axis=0), nanmean(X, axis=0) if not weighted else weighted_mean(), np.zeros(X.shape[1]), # variance not supported X.shape[0] - non_zero, non_zero)) else: nans = (~X.astype(bool)).sum( axis=0) if X.size else np.zeros(X.shape[1]) return np.column_stack( (np.tile(np.inf, X.shape[1]), np.tile(-np.inf, X.shape[1]), np.zeros(X.shape[1]), np.zeros(X.shape[1]), nans, X.shape[0] - nans))
def run(self): """ Run method of the module. Fake positive companions are injected for a range of separations and angles. The magnitude of the contrast is changed stepwise and lowered by a factor 2 if needed. Once the fractional accuracy of the false positive fraction threshold is met, a linear interpolation is used to determine the final contrast. Note that the sigma level is fixed therefore the false positive fraction changes with separation, following the Student's t-distribution (Mawet et al. 2014). :return: None """ if self.m_angle[0] < 0. or self.m_angle[0] > 360. or self.m_angle[1] < 0. or \ self.m_angle[1] > 360. or self.m_angle[2] < 0. or self.m_angle[2] > 360.: raise ValueError( "The angular positions of the fake planets should lie between " "0 deg and 360 deg.") images = self.m_image_in_port.get_all() psf = self.m_psf_in_port.get_all() pixscale = self.m_image_in_port.get_attribute("PIXSCALE") self.m_aperture /= pixscale if psf.ndim == 3 and psf.shape[0] != images.shape[0]: warnings.warn( 'The number of frames in psf_in_tag does not match with the number of ' 'frames in image_in_tag. Using the mean of psf_in_tag as PSF template.' ) center = np.array([images.shape[2] / 2., images.shape[1] / 2.]) pos_r = np.arange(self.m_separation[0] / pixscale, self.m_separation[1] / pixscale, self.m_separation[2] / pixscale) pos_t = np.arange(self.m_angle[0] + self.m_extra_rot, self.m_angle[1] + self.m_extra_rot, self.m_angle[2]) if self.m_cent_size is None: index_del = np.argwhere(pos_r - self.m_aperture <= 0.) else: index_del = np.argwhere( pos_r - self.m_aperture <= self.m_cent_size / pixscale) pos_r = np.delete(pos_r, index_del) if self.m_edge_size is None or self.m_edge_size / pixscale > images.shape[ 1] / 2.: index_del = np.argwhere( pos_r + self.m_aperture >= images.shape[1] / 2.) else: index_del = np.argwhere( pos_r + self.m_aperture >= self.m_edge_size / pixscale) pos_r = np.delete(pos_r, index_del) fake_mag = np.zeros((len(pos_r), len(pos_t))) fake_fpf = np.zeros((len(pos_r))) count = 1 sys.stdout.write("Running ContrastCurveModule...\n") sys.stdout.flush() for m, sep in enumerate(pos_r): fpf_threshold = student_fpf(self.m_sigma, sep, self.m_aperture, self.m_ignore) fake_fpf[m] = fpf_threshold for n, ang in enumerate(pos_t): sys.stdout.write("Processing position " + str(count) + " out of " + \ str(np.size(fake_mag))) sys.stdout.flush() x_fake = center[0] + sep * math.cos( np.radians(ang + 90. - self.m_extra_rot)) y_fake = center[1] + sep * math.sin( np.radians(ang + 90. - self.m_extra_rot)) num_mag = np.size(fake_mag[m, 0:n]) num_nan = np.size(np.where(np.isnan(fake_mag[m, 0:n]))) if n == 0 or num_mag - num_nan == 0: list_mag = [self.m_magnitude[0]] mag_step = self.m_magnitude[1] else: list_mag = [np.nanmean(fake_mag[m, 0:n])] mag_step = 0.1 list_fpf = [] iteration = 1 while True: sys.stdout.write('.') sys.stdout.flush() mag = list_mag[-1] fake_planet = FakePlanetModule( position=(sep * pixscale, ang), magnitude=mag, psf_scaling=self.m_psf_scaling, interpolation="spline", name_in="fake_planet", image_in_tag=self.m_image_in_tag, psf_in_tag=self.m_psf_in_tag, image_out_tag="contrast_fake", verbose=False) fake_planet.connect_database(self._m_data_base) fake_planet.run() prep = PSFpreparationModule(name_in="prep", image_in_tag="contrast_fake", image_out_tag="contrast_prep", image_mask_out_tag=None, mask_out_tag=None, norm=self.m_norm, resize=None, cent_size=self.m_cent_size, edge_size=self.m_edge_size, verbose=False) prep.connect_database(self._m_data_base) prep.run() psf_sub = PcaPsfSubtractionModule( name_in="pca_contrast", pca_numbers=self.m_pca_number, images_in_tag="contrast_prep", reference_in_tag="contrast_prep", res_mean_tag="contrast_res_mean", res_median_tag=None, res_arr_out_tag=None, res_rot_mean_clip_tag=None, extra_rot=self.m_extra_rot, verbose=False) psf_sub.connect_database(self._m_data_base) psf_sub.run() res_input_port = self.add_input_port("contrast_res_mean") im_res = res_input_port.get_all() if len(im_res.shape) == 3: if im_res.shape[0] == 1: im_res = np.squeeze(im_res, axis=0) else: raise ValueError( "Multiple residual images found, expecting only one." ) if self.m_pca_out_port is not None: if count == 1 and iteration == 1: self.m_pca_out_port.set_all(im_res, data_dim=3) else: self.m_pca_out_port.append(im_res, data_dim=3) _, _, fpf = false_alarm(im_res, x_fake, y_fake, self.m_aperture, self.m_ignore) list_fpf.append(fpf) if abs(fpf_threshold - list_fpf[-1]) < self.m_accuracy * fpf_threshold: if len(list_fpf) == 1: fake_mag[m, n] = list_mag[0] sys.stdout.write("\n") sys.stdout.flush() break else: if (fpf_threshold > list_fpf[-2] and fpf_threshold < list_fpf[-1]) or \ (fpf_threshold < list_fpf[-2] and fpf_threshold > list_fpf[-1]): fpf_interp = interp1d(list_fpf[-2:], list_mag[-2:], 'linear') fake_mag[m, n] = fpf_interp(fpf_threshold) sys.stdout.write("\n") sys.stdout.flush() break else: pass if list_fpf[-1] < fpf_threshold: if list_mag[-1] + mag_step in list_mag: mag_step /= 2. list_mag.append(list_mag[-1] + mag_step) else: if np.size(list_fpf) > 2 and \ list_mag[-1] < list_mag[-2] and list_mag[-2] < list_mag[-3] and \ list_fpf[-1] > list_fpf[-2] and list_fpf[-2] < list_fpf[-3]: warnings.warn( "Magnitude decreases but false positive fraction " "increases. Adjusting magnitude to %s and step size " "to %s" % (list_mag[-3], mag_step / 2.)) list_fpf = [] list_mag = [list_mag[-3]] mag_step /= 2. else: if list_mag[-1] - mag_step in list_mag: mag_step /= 2. list_mag.append(list_mag[-1] - mag_step) if list_mag[-1] <= 0.: warnings.warn( "The relative magnitude has become smaller or equal to " "zero. Adjusting magnitude to 7.5 and step size to 0.1." ) list_mag[-1] = 7.5 mag_step = 0.1 iteration += 1 if iteration == 50: warnings.warn( "ContrastModule could not converge at the position of " "%s arcsec and %s deg." % (sep * pixscale, ang)) fake_mag[m, n] = np.nan sys.stdout.write("\n") sys.stdout.flush() break count += 1 result = np.column_stack((pos_r * pixscale, np.nanmean(fake_mag, axis=1), np.nanvar(fake_mag, axis=1), fake_fpf)) self.m_contrast_out_port.set_all(result, data_dim=2) sys.stdout.write("Running ContrastCurveModule... [DONE]\n") sys.stdout.flush() if self.m_pca_out_port is not None: self.m_pca_out_port.add_history_information( "Contrast limits", str(self.m_sigma) + " sigma") self.m_pca_out_port.copy_attributes_from_input_port( self.m_image_in_port) self.m_contrast_out_port.add_history_information( "Contrast limits", str(self.m_sigma) + " sigma") self.m_contrast_out_port.copy_attributes_from_input_port( self.m_image_in_port) self.m_contrast_out_port.close_port()
def fit(X, y): idx = simplify_labels(y) mean_map = np.nanmean(X * idx, axis=0) var_map = np.nanvar(X * idx, axis=0) return np.array([mean_map, var_map])
def get_r_hat(self, parameter_array): """ Based on some fancy mathlab code, it return an array [R_stat, MR_stat] :param parameter_array: 3 dim array of parameter estimation sets :type parameter_array: list :return: [R_stat, MR_stat] :rtype: list """ n, d, N = parameter_array.shape # Use only the last 50% of each chain (vrugt 2009), that means only the half of "d". Cause "d" ist the count # of the repetition and we use the d/2 to d of those values which are already not NAN whereIsNoNAN = np.logical_not(np.isnan(parameter_array)) alreadyToNum = np.sum(whereIsNoNAN[0, :, 0]) if alreadyToNum > 3: parameter_array = parameter_array[:, int(np.floor(alreadyToNum / 2)):alreadyToNum, :] else: # the later functions need some data to work right, so we use in this case 100% of NON NAN values parameter_array = parameter_array[:, 0:alreadyToNum, :] # I made a big confusion with d, n and N, I figured it out by tests if n > 3: mean_chains = np.zeros((n, N)) for i in range(n): for j in range(N): mean_chains[i, j] = np.nanmean(parameter_array[i, :, j]) B_uni = np.zeros(N) for i in range(N): B_uni[i] = d * np.nanvar( mean_chains[:, i], ddof=1 ) # make numpy Mathalab like: https://stackoverflow.com/a/27600240/5885054 var_chains = np.zeros((n, N)) for i in range(n): for j in range(N): var_chains[i, j] = np.nanvar(parameter_array[i, :, j], ddof=1) W_uni = np.zeros(N) for i in range(N): W_uni[i] = np.mean(var_chains[:, i]) sigma2 = ((d - 1) / d) * W_uni + (1 / d) * B_uni whichW_UNIIsNull = W_uni == 0.0 W_uni[whichW_UNIIsNull] = np.random.uniform(0.1, 1, 1) R_stat = np.sqrt((n + 1) / n * (np.divide(sigma2, W_uni)) - (d - 1) / (n * d)) # W_mult = 0 # for ii in range(n): # W_mult = W_mult + np.cov(np.nan_to_num(np.transpose(parameter_array[ii, :, :])), ddof=1) # # W_mult = W_mult / n + 2e-52 * np.eye(N) # # # Note that numpy.cov() considers its input data matrix to have observations in each column, # # and variables in each row, so to get numpy.cov() to return what other packages do, # # you have to pass the transpose of the data matrix to numpy.cov(). # # https://stats.stackexchange.com/a/263508/168054 # # B_mult = np.cov(np.nan_to_num(np.transpose(mean_chains))) + 2e-52 * np.eye(N) # 2e-52 avoids problems with eig if var = 0 # M = np.linalg.lstsq(W_mult, B_mult) # R = np.max(np.abs(np.linalg.eigvals(M[0]))) # MR_stat = np.sqrt((n + 1) / n * R + (d - 1) / d) return R_stat #[R_stat, MR_stat]
def main(): os.chdir(config.DIRPATH) masterdf = pd.read_csv("./!Data/Cleaner/RIAIMaster.csv") piindexlist = [] for i in range(0, int((len(masterdf.columns) - 3) / 3)): piindexlist.append((i * 3) + 3) #Create csv for Statsum by Pi with open('./!Data/SumStat/RIAISumStatDay.csv', 'w') as csvFile: writer = csv.writer( csvFile, lineterminator='\n', ) writer.writerow([ 'Datetime', 'DLMean', 'DLMedian', 'DLMax', 'DLStdDev', 'DLVar', 'DLSkew', 'ULMean', 'ULMedian', 'ULMax', 'ULStdDev', 'ULVar', 'ULSkew', 'RTTMean', 'RTTMedian', 'RTTMax', 'RTTStdDev', 'RTTVar', 'RTTSkew' ]) for rowindex in range(0, len(masterdf.index)): datetimevar = masterdf['Datetime'].loc[rowindex] #rtt stats rttarray = [] for i in range(0, len(piindexlist)): rttarray.append( masterdf[list(masterdf)[piindexlist[i]]].loc[rowindex]) rttmean = np.nanmean(rttarray) rttmedian = np.nanmedian(rttarray) rttmax = np.nanmax(rttarray) rttstd = np.nanstd(rttarray) rttvar = np.nanvar(rttarray) nprttarray = array(rttarray) rttarraynan = nprttarray[~np.isnan(nprttarray)] rttskew = skew(rttarraynan) #dl stats dlarray = [] for i in range(0, len(piindexlist)): dlarray.append(masterdf[list(masterdf)[piindexlist[i] + 1]].loc[rowindex]) dlmean = np.nanmean(dlarray) dlmedian = np.nanmedian(dlarray) dlmax = np.nanmax(dlarray) dlstd = np.nanstd(dlarray) dlvar = np.nanvar(dlarray) npdlarray = array(dlarray) dlarraynan = npdlarray[~np.isnan(npdlarray)] dlskew = skew(dlarraynan) #ul stats ularray = [] for i in range(0, len(piindexlist)): ularray.append(masterdf[list(masterdf)[piindexlist[i] + 2]].loc[rowindex]) ulmean = np.nanmean(ularray) ulmedian = np.nanmedian(ularray) ulmax = np.nanmax(ularray) ulstd = np.nanstd(ularray) ulvar = np.nanvar(ularray) npularray = array(ularray) ularraynan = npularray[~np.isnan(npularray)] ulskew = skew(ularraynan) #write row newrow = [ datetimevar, dlmean, dlmedian, dlmax, dlstd, dlvar, dlskew, ulmean, ulmedian, ulmax, ulstd, ulvar, ulskew, rttmean, rttmedian, rttmax, rttstd, rttvar, rttskew ] writer.writerow(newrow) print('Completed statsum for - ', datetimevar) open('./log.txt', "a").write( str(datetime.now()) + ' - SumStat/RIAISumStatDay.csv successfully created \n')
@author: adrien Classification data PET Dailt Gillot Airport """ import os import numpy as np import matplotlib.pyplot as plt import pandas as pd import tasgrid as tg import utm #%% Plot evo temporelle ETP plt.close('all') dataRaw = pd.read_csv('GillotDaily.csv', sep=';') dataRaw.DATE = pd.DatetimeIndex(dataRaw.DATE) dataRawbis = dataRaw.set_index('DATE') plt.figure() plt.plot(dataRawbis.ETPMON, 'b*') Avg = np.nanmean(dataRawbis.ETPMON) Var = np.nanvar(dataRawbis.ETPMON) Std = np.nanstd(dataRawbis.ETPMON) Rapport = Std / Avg * 100
tp.columns = ['uid', i + '_max'] if gn.empty == True: gn = tp else: gn = pd.merge(gn, tp, on='uid', how='left') #对历史数据求最小值 tp = pd.DataFrame( df.groupby('uid').apply(lambda df: np.nanmin(df[i])).reset_index()) tp.columns = ['uid', i + '_min'] if gn.empty == True: gn = tp else: gn = pd.merge(gn, tp, on='uid', how='left') #对历史数据求方差 tp = pd.DataFrame( df.groupby('uid').apply(lambda df: np.nanvar(df[i])).reset_index()) tp.columns = ['uid', i + '_var'] if gn.empty == True: gn = tp else: gn = pd.merge(gn, tp, on='uid', how='left') #对历史数据求极差 tp = pd.DataFrame( df.groupby('uid').apply( lambda df: np.nanmax(df[i]) - np.nanmin(df[i])).reset_index()) tp.columns = ['uid', i + '_ran'] if gn.empty == True: gn = tp else: gn = pd.merge(gn, tp, on='uid', how='left') #对历史数据求变异系数,为防止除数为0,利用0.01进行平滑
[100 - (100 - q) / 2, (100 - q) / 2], axis=0) for q in [68, 95, 99] ] # compute statistics, uncalibrated predictive distribution posterior_dist_median_orig = np.nanmedian(ensemble_sample_val_orig, axis=0) posterior_dist_quantiles_orig = [ np.percentile(ensemble_sample_val_orig, [100 - (100 - q) / 2, (100 - q) / 2], axis=0) for q in [68, 95, 99] ] # compute statistics, calibrated predictive distribution posterior_mean_median = np.nanmedian(ensemble_mean_val.T, axis=0) posterior_mean_cov = np.nanvar(ensemble_mean_val.T, axis=0) posterior_mean_adj_median = np.nanmedian(ensemble_mean_corrected_val.T, axis=0) posterior_mean_adj_cov = np.nanvar(ensemble_mean_corrected_val.T, axis=0) posterior_dist_mu = np.nanmean(ensemble_sample_calib_val.T, axis=0) posterior_dist_median = np.nanmedian(ensemble_sample_calib_val.T, axis=0) posterior_dist_cov = np.nanvar(ensemble_sample_calib_val.T, axis=0) posterior_dist_quantiles = [ np.percentile(ensemble_sample_calib_val.T, [100 - (100 - q) / 2, (100 - q) / 2], axis=0) for q in [68, 95, 99] ] # compute statistics, additional variance due to G
def run(self) -> None: """ Run method of the module. An artificial planet is injected (based on the noise level) at a given separation and position angle. The amount of self-subtraction is then determined and the contrast limit is calculated for a given sigma level or false positive fraction. A correction for small sample statistics is applied for both cases. Note that if the sigma level is fixed, the false positive fraction changes with separation, following the Student's t-distribution (see Mawet et al. 2014 for details). Returns ------- NoneType None """ images = self.m_image_in_port.get_all() psf = self.m_psf_in_port.get_all() if psf.shape[0] != 1 and psf.shape[0] != images.shape[0]: raise ValueError(f'The number of frames in psf_in_tag {psf.shape} does not match with ' f'the number of frames in image_in_tag {images.shape}. The ' f'DerotateAndStackModule can be used to average the PSF frames ' f'(without derotating) before applying the ContrastCurveModule.') cpu = self._m_config_port.get_attribute('CPU') working_place = self._m_config_port.get_attribute('WORKING_PLACE') parang = self.m_image_in_port.get_attribute('PARANG') pixscale = self.m_image_in_port.get_attribute('PIXSCALE') self.m_image_in_port.close_port() self.m_psf_in_port.close_port() if self.m_cent_size is not None: self.m_cent_size /= pixscale if self.m_edge_size is not None: self.m_edge_size /= pixscale self.m_aperture /= pixscale pos_r = np.arange(self.m_separation[0]/pixscale, self.m_separation[1]/pixscale, self.m_separation[2]/pixscale) pos_t = np.arange(self.m_angle[0]+self.m_extra_rot, self.m_angle[1]+self.m_extra_rot, self.m_angle[2]) if self.m_cent_size is None: index_del = np.argwhere(pos_r-self.m_aperture <= 0.) else: index_del = np.argwhere(pos_r-self.m_aperture <= self.m_cent_size) pos_r = np.delete(pos_r, index_del) if self.m_edge_size is None or self.m_edge_size > images.shape[1]/2.: index_del = np.argwhere(pos_r+self.m_aperture >= images.shape[1]/2.) else: index_del = np.argwhere(pos_r+self.m_aperture >= self.m_edge_size) pos_r = np.delete(pos_r, index_del) positions = [] for sep in pos_r: for ang in pos_t: positions.append((sep, ang)) result = [] async_results = [] # Create temporary files tmp_im_str = os.path.join(working_place, 'tmp_images.npy') tmp_psf_str = os.path.join(working_place, 'tmp_psf.npy') np.save(tmp_im_str, images) np.save(tmp_psf_str, psf) mask = create_mask(images.shape[-2:], (self.m_cent_size, self.m_edge_size)) _, im_res = pca_psf_subtraction(images=images*mask, angles=-1.*parang+self.m_extra_rot, pca_number=self.m_pca_number) noise = combine_residuals(method=self.m_residuals, res_rot=im_res) pool = mp.Pool(cpu) for pos in positions: async_results.append(pool.apply_async(contrast_limit, args=(tmp_im_str, tmp_psf_str, noise, mask, parang, self.m_psf_scaling, self.m_extra_rot, self.m_pca_number, self.m_threshold, self.m_aperture, self.m_residuals, self.m_snr_inject, pos))) pool.close() start_time = time.time() # wait for all processes to finish while mp.active_children(): # number of finished processes nfinished = sum([i.ready() for i in async_results]) progress(nfinished, len(positions), 'Calculating detection limits...', start_time) # check if new processes have finished every 5 seconds time.sleep(5) if nfinished != len(positions): sys.stdout.write('\r ') sys.stdout.write('\rCalculating detection limits... [DONE]\n') sys.stdout.flush() # get the results for every async_result object for item in async_results: result.append(item.get()) pool.terminate() os.remove(tmp_im_str) os.remove(tmp_psf_str) result = np.asarray(result) # Sort the results first by separation and then by angle indices = np.lexsort((result[:, 1], result[:, 0])) result = result[indices] result = result.reshape((pos_r.size, pos_t.size, 4)) mag_mean = np.nanmean(result, axis=1)[:, 2] mag_var = np.nanvar(result, axis=1)[:, 2] res_fpf = result[:, 0, 3] limits = np.column_stack((pos_r*pixscale, mag_mean, mag_var, res_fpf)) self.m_image_in_port._check_status_and_activate() self.m_contrast_out_port._check_status_and_activate() self.m_contrast_out_port.set_all(limits, data_dim=2) history = f'{self.m_threshold[0]} = {self.m_threshold[1]}' self.m_contrast_out_port.add_history('ContrastCurveModule', history) self.m_contrast_out_port.copy_attributes(self.m_image_in_port) self.m_contrast_out_port.close_port()
def variance(F, B, c, f, b): return np.nanvar(f)
def stack_var(arrs, nodata=None): """see stack_stats""" a = check_stack(arrs) if nodata is not None: a = mask_stack(a, nodata=nodata) return np.nanvar(a, axis=0)
def reweight(MSh, mode): # get data per antenna var_antenna = {} med_antenna = {} for ant_id, ant_name, ms_ant in MSh.iter_antenna(): with Timer('Get data'): data = ms_ant.getcol('GDATA') # axes: time, ant, freq, pol flags = ms_ant.getcol('GFLAG') # axes: time, ant, freq, pol # put flagged data to NaNs data[flags] = np.nan # if completely flagged set variance to 1 and continue if np.all(flags): var_antenna[ant_id] = None med_antenna[ant_id] = None continue with Timer('Prepare data'): # data column is updated subtracting adjacent channels if mode == 'subchan': data_shifted_l = np.roll(data, -1, axis=2) data_shifted_r = np.roll(data, +1, axis=2) # if only 2 freq it's aleady ok, subtracting one from the other if data.shape[2] > 2: data_shifted_l[:, :, -1, :] = data_shifted_l[:, :, -3, :] # last chan uses the one but last data_shifted_r[:, :, 0, :] = data_shifted_r[:, :, 2, :] # first chan uses third # get the "best" shift, either on the right or left. This is to avoid propagating bad channels (e.g. with RFI) ratio_l = np.nanvar(data_shifted_l, axis=( 0, 1, 3)) / np.nanmean(data_shifted_l, axis=(0, 1, 3)) ratio_l[np.isnan(ratio_l)] = np.inf ratio_r = np.nanvar(data_shifted_r, axis=( 0, 1, 3)) / np.nanmean(data_shifted_r, axis=(0, 1, 3)) ratio_r[np.isnan(ratio_r)] = np.inf data = np.where((ratio_l < ratio_r)[np.newaxis, np.newaxis, :, np.newaxis], data - data_shifted_l, data - data_shifted_r) # data column is updated subtracting adjacent times if mode == 'subtime': data_shifted_l = np.roll(data, -1, axis=0) data_shifted_r = np.roll(data, +1, axis=0) # if only 2 freq it's aleady ok, subtracting one from the other if data.shape[0] > 2: data_shifted_l[-1, :, :, :] = data_shifted_l[ -3, :, :, :] # last timeslot uses the one but last data_shifted_r[0, :, :, :] = data_shifted_r[ 2, :, :, :] # first timeslot uses third # get the "best" shift, either on the right or left. This is to avoid propagating bad channels (e.g. with RFI) ratio_l = np.nanvar(data_shifted_l, axis=( 1, 2, 3)) / np.nanmean(data_shifted_l, axis=(1, 2, 3)) ratio_l[np.isnan(ratio_l)] = np.inf ratio_r = np.nanvar(data_shifted_r, axis=( 1, 2, 3)) / np.nanmean(data_shifted_r, axis=(1, 2, 3)) ratio_r[np.isnan(ratio_r)] = np.inf data = np.where((ratio_l < ratio_r)[:, np.newaxis, np.newaxis, np.newaxis], data - data_shifted_l, data - data_shifted_r) # use residual data, nothing to do here elif mode == 'residual': pass with Timer('Calc variances'): # find mean/variance per time/freq for each antenna med_freqs = np.abs(np.nanmean(data, axis=(1, 2))**2) # time x pol med_times = np.abs(np.nanmean(data, axis=(0, 1))**2) # freq x pol med_antenna[ ant_id] = med_freqs[:, np. newaxis] + med_times # sum of the time/freq mean - axes: time,freq,pol var_freqs = np.nanvar(data, axis=(1, 2)) # time x pol var_times = np.nanvar(data, axis=(0, 1)) # freq x pol var_antenna[ ant_id] = var_freqs[:, np. newaxis] + var_times # sum of the time/freq variances - axes: time,freq,pol # reconstruct BL weights from antenna variance for ms_bl in MSh.ms.iter(["ANTENNA1", "ANTENNA2"]): ant_id1 = ms_bl.getcol('ANTENNA1')[0] ant_id2 = ms_bl.getcol('ANTENNA2')[0] if var_antenna[ant_id1] is None or var_antenna[ant_id2] is None: continue # print '### BL: %i - %i' % (ant_id1, ant_id2) # print var_antenna[ant_id1]*med_antenna[ant_id2] # print '' # print var_antenna[ant_id2]*med_antenna[ant_id1] # print '' # print var_antenna[ant_id1]*var_antenna[ant_id2] w = 1./( var_antenna[ant_id1]*med_antenna[ant_id2] + var_antenna[ant_id2]*med_antenna[ant_id1] \ + var_antenna[ant_id1]*var_antenna[ant_id2] ) w -= np.nanmedian(w) # TEST: REMOVE MEDIAN? f = ms_bl.getcol('FLAG') # find how many unflagged weights are nans ntoflag = np.count_nonzero(np.isnan(w[~f])) logging.debug('BL: %i - %i: created %i new flags (%f%%)' % (ant_id1, ant_id2, ntoflag, (100. * ntoflag) / np.size(w))) ms_bl.putcol(MSh.wcolname, w) ms_bl.flush() # flag weights that are nans taql( 'update $ms_bl set FLAG[isnan(WEIGHT_SPECTRUM)]=True, WEIGHT_SPECTRUM[isnan(WEIGHT_SPECTRUM)]=0' ) ms_bl.flush()
index = pop + POP_NB * Replicate HsSelMPop.append(HsSel[index][gen]) HobsSelMPop.append(HobsSel[index][gen]) FisSelMPop.append(FisSel[index][gen]) ExtMPop.append(ExtSel[index][gen]) if np.nansum(HobsSelMPop) == 0 and np.nansum(FisSelMPop) == 0: HsSelBarMeanRep.append(np.nan) HsSelBarVarRep.append(np.nan) HobsSelBarMeanRep.append(np.nan) HobsSelBarVarRep.append(np.nan) FisSelBarMeanRep.append(np.nan) FisSelBarVarRep.append(np.nan) ExtSelTotRep.append(sum(ExtMPop)) else: HsSelBarMeanRep.append(np.nanmean(HsSelMPop)) HsSelBarVarRep.append(np.nanvar(HsSelMPop)) HobsSelBarMeanRep.append(np.nanmean(HobsSelMPop)) HobsSelBarVarRep.append(np.nanvar(HobsSelMPop)) FisSelBarMeanRep.append(np.nanmean(FisSelMPop)) FisSelBarVarRep.append(np.nanvar(FisSelMPop)) ExtSelTotRep.append(sum(ExtMPop)) HsSelBarMean.append(HsSelBarMeanRep) HsSelBarVar.append(HsSelBarVarRep) HobsSelBarMean.append(HobsSelBarMeanRep) HobsSelBarVar.append(HobsSelBarVarRep) FisSelBarMean.append(FisSelBarMeanRep) FisSelBarVar.append(FisSelBarVarRep) ExtSelTot.append(ExtSelTotRep) # We mean HtLocSel and HtLocNSel for the markers
def calculate_posterior_mc_frac(mc_da, cov_da, var_dim=None, normalize_per_cell=True, clip_norm_value=10): # so we can do post_frac only in a very small set of gene to prevent memory issue with warnings.catch_warnings(): warnings.simplefilter("ignore") # here we expected to see a true_divide warning due to cov=0 raw_frac = mc_da / cov_da if isinstance(raw_frac, np.ndarray): # np.ndarray ndarray = True else: ndarray = False if ndarray: cell_rate_mean = np.nanmean(raw_frac, axis=1) cell_rate_var = np.nanvar(raw_frac, axis=1) else: # assume xr.DataArray if var_dim is None: cell_rate_mean = raw_frac.mean(axis=1) # this skip na cell_rate_var = raw_frac.var(axis=1) # this skip na else: cell_rate_mean = raw_frac.mean(dim=var_dim) # this skip na cell_rate_var = raw_frac.var(dim=var_dim) # this skip na # based on beta distribution mean, var # a / (a + b) = cell_rate_mean # a * b / ((a + b) ^ 2 * (a + b + 1)) = cell_rate_var # calculate alpha beta value for each cell cell_a = (1 - cell_rate_mean) * (cell_rate_mean** 2) / cell_rate_var - cell_rate_mean cell_b = cell_a * (1 / cell_rate_mean - 1) # cell specific posterior rate post_frac: Union[np.ndarray, xr.DataArray] if ndarray: post_frac = (mc_da + cell_a[:, None]) / (cov_da + cell_a[:, None] + cell_b[:, None]) else: post_frac = (mc_da + cell_a) / (cov_da + cell_a + cell_b) if normalize_per_cell: # there are two ways of normalizing per cell, by posterior or prior mean: # prior_mean = cell_a / (cell_a + cell_b) # posterior_mean = post_rate.mean(dim=var_dim) # Here I choose to use prior_mean to normalize cell, # therefore all cov == 0 features will have normalized rate == 1 in all cells. # i.e. 0 cov feature will provide no info prior_mean = cell_a / (cell_a + cell_b) if ndarray: post_frac = post_frac / prior_mean[:, None] else: post_frac = post_frac / prior_mean if clip_norm_value is not None: if isinstance(post_frac, np.ndarray): # np.ndarray post_frac[post_frac > clip_norm_value] = clip_norm_value else: # xarray.DataArray post_frac = post_frac.where(post_frac < clip_norm_value, clip_norm_value) return post_frac
def compute(self, raster_sources, sample_prob=None): """Compute the mean and stds over all the raster_sources. This ignores NODATA values. If sample_prob is set, then a subset of each scene is used to compute stats which speeds up the computation. Roughly speaking, if sample_prob=0.5, then half the pixels in the scene will be used. More precisely, the number of chips is equal to sample_prob * (width * height / 300^2), or 1, whichever is greater. Each chip is uniformly sampled from the scene with replacement. Otherwise, it uses a sliding window over the entire scene to compute stats. Args: raster_sources: list of RasterSource sample_prob: (float or None) between 0 and 1 """ stride = chip_sz nb_channels = raster_sources[0].num_channels def get_chip(raster_source, window): """Return chip or None if all values are NODATA.""" chip = raster_source.get_raw_chip(window).astype(np.float32) # Convert shape from [h,w,c] to [c,h*w] chip = np.reshape(np.transpose(chip, [2, 0, 1]), (nb_channels, -1)) # Ignore NODATA values. chip[chip == 0.0] = np.nan if np.any(~np.isnan(chip)): return chip return None def sliding_chip_stream(): """Get stream of chips using a sliding window of size 300.""" for raster_source in raster_sources: with raster_source.activate(): windows = raster_source.get_extent().get_windows( chip_sz, stride) for window in windows: chip = get_chip(raster_source, window) if chip is not None: yield chip def random_chip_stream(): """Get random stream of chips.""" for raster_source in raster_sources: with raster_source.activate(): extent = raster_source.get_extent() num_pixels = extent.get_width() * extent.get_height() num_chips = round( sample_prob * (num_pixels / (chip_sz**2))) num_chips = max(1, num_chips) for _ in range(num_chips): window = raster_source.get_extent().make_random_square( chip_sz) chip = get_chip(raster_source, window) if chip is not None: yield chip # For each chip, compute the mean and var of that chip and then update the # running mean and var. count = 0 mean = np.zeros((nb_channels, )) var = np.zeros((nb_channels, )) chip_stream = (sliding_chip_stream() if sample_prob is None else random_chip_stream()) for c in chip_stream: chip_means = np.nanmean(c, axis=1) chip_vars = np.nanvar(c, axis=1) chip_count = np.sum(c[0] != np.nan) var = parallel_variance(chip_means, chip_count, chip_vars, mean, count, var) mean = parallel_mean(chip_means, chip_count, mean, count) count += chip_count self.means = mean self.stds = np.sqrt(var)
def __init__(self, filename, world_type="TimeSeries", name="", owner="", engine=None, uid=None, version=1, config={}): World.__init__(self, filename, world_type=world_type, name=name, owner=owner, uid=uid, version=version, config=config) self.data['assets'] = self.assets filename = config.get('time_series_data_file', "timeseries.npz") if os.path.isabs(filename): path = filename else: path = os.path.join(cfg['micropsi2']['data_directory'], filename) self.logger.info("loading timeseries from %s for world %s" % (path, uid)) self.realtime_per_entry = int(config['realtime_per_entry']) self.last_realtime_step = datetime.utcnow().timestamp() * 1000 try: with np.load(path) as f: self.timeseries = f['data'] self.ids = f['ids'] self.timestamps = f['timestamps'] except IOError as error: self.logger.error("Could not load data file %s, error was: %s" % (path, str(error))) self.ids = [0] self.timeseries[[0, 0, 0]] self.timestamps = [0] self.len_ts = 1 return # todo use the new configurable world options. dummydata = config['dummy_data'] == "True" z_transform = config['z_transform'] == "True" clip_and_scale = config['clip_and_scale'] == "True" sigmoid = config['sigmoid'] == "True" self.shuffle = config['shuffle'] == "True" if clip_and_scale and sigmoid: self.logger.warn( "clip_and_scale and sigmoid cannot both be configured, choosing sigmoid" ) clip_and_scale = False def sigm(X): """ sigmoid that avoids float overflows for very small inputs. expects a numpy float array. """ cutoff = np.log(np.finfo(X.dtype).max) - 1 X[np.nan_to_num(X) <= -cutoff] = -cutoff return 1. / (1. + np.exp(-X)) if (z_transform or clip_and_scale or sigmoid) and not dummydata: data_z = np.empty_like(self.timeseries) data_z[:] = np.nan pstds = [] for i, row in enumerate(self.timeseries): if not np.all(np.isnan(row)): std = np.sqrt(np.nanvar(row)) if std > 0: if not clip_and_scale: row_z = (row - np.nanmean(row)) / std if clip_and_scale: row_z = row - np.nanmean(row) pstd = std * 4 row_z[np.nan_to_num(row_z) > pstd] = pstd row_z[np.nan_to_num(row_z) < -pstd] = -pstd row_z = ((row_z / pstd) + 1) * 0.5 data_z[i, :] = row_z self.timeseries = data_z if not sigmoid else sigm(data_z) if dummydata: self.logger.warn("! Using dummy data") n_ids = self.timeseries.shape[0] self.timeseries = np.tile(np.random.rand(n_ids, 1), (1, 10)) self.len_ts = self.timeseries.shape[1]
def plot_chip_variation(plot_var=True): ## load data dataFile_list = [] base_name = "sandwichJJ_102318" codename_list = ["pbj", "blt", "sub", "loaf"] color_list = ['red', 'purple', 'cyan', 'orange'] quarterIndex_list = [3, 4] for q in quarterIndex_list: for codename in codename_list: next_file = base_name + f"_{codename}_q{q}.dat" dataFile_list.append(next_file) ##END loop through codenames ## load each file and convert to resistance for i, dataFile in enumerate(dataFile_list): # setup formating data_color = color_list[i % 4] codename = codename_list[i % 4] mark_forQuarter = ['*', 'o'][(i // 4) % 2] # Q3 and Q4 respectively # for PBJ, BLT, SUB, LOAF evap_angle = [ 30 / 180 * np.pi, 30 / 180 * np.pi, 45 / 180 * np.pi, 45 / 180 * np.pi ][i % 4] thickness = [1E-9, 1.5E-9, 1.5E-9, 1E-9][i % 4] hatch = ['//', '\\', 'x', '|'][i % 4] if mark_forQuarter == '*': continue # skip Q3 for now, too high variance from opens ## based on chip number, is exterior the first or last column? # on Q3 increasing column number corresponds to moving towards interior # on Q4 ' ' moving towards exterior # Q1 ~ Q3 and Q2~Q4 but we're not using either of those (bad liftoff) is_radial_ordering = [False, True][(i // 4) % 2] # Q3 and Q4 respectively # load file with open(dataFile) as open_file: read = open_file.readlines() open_file.close() ## use regular expressions to extract 2 numbers re_template = "((\d+\.?\d*)\suV)" # decimal number, then space then 'uV' num_blocks = 5 voltage_list = parse_input(read, re_template, num_blocks=num_blocks, num_cols=4) res, current = voltage_to_resistance_critCurr(voltage_list, bias_r=1E6) ## get single junction devices ordered by distance from center of chip # ie reverse Q3 if is_radial_ordering: iter_res = iter(res) else: iter_res = reversed(res) ## plot single squids as a function of chip position for i, single_column in enumerate(iter_res): singleJJ_res = get_singleJunc_devices(single_column) singleJJ_res /= 1E3 avg = np.nanmean(singleJJ_res) var = np.nanvar(singleJJ_res) if plot_var: plt.plot(i, var, marker=mark_forQuarter, ms=9, color=data_color) else: for res in singleJJ_res: plt.plot(i, res, marker=mark_forQuarter, ms=5, color=data_color) plt.xlabel("Radial position [from center]") if plot_var: plt.ylabel("Variance in Res. of Single SQuID [kOhms^2]") else: plt.ylabel("Res. of Single SQuID [kOhms]") plt.xticks(np.arange(i + 1, dtype='int')) add_bands(np.mean(res), evap_angle, thickness, data_color, hatch=hatch) ##END loop through data files ## make a custom legend label_list = [ "PBJ Q4\n($30^\circ$ evap, Std $O_2$)", "BLT Q4\n($30^\circ$ evap, 0.5 nm Al$O_x$)", "SUB Q4\n($45^\circ$ evap, 0.5 nm Al$O_x$)", "LOAF Q4\n($45^\circ$ evap, Std $O_2$)" ] box_outline = dict(facecolor='white', alpha=0.8, boxstyle='round') x_pos, y_pos = 0.5, 0.9 ## axis coordinates gap = 0.105 ax = plt.gca() for i, label in enumerate(label_list): c = color_list[i % 4] plt.text(x_pos, y_pos - i * gap, label, color=c, bbox=box_outline, transform=ax.transAxes) plt.show()
def _process_eeg(self, samples, timestamp): """Process EEG. Process EEG. Includes buffering, filtering, windowing and pipeline. Args: samples (numpy.ndarray): new EEG samples to process timestamp (float): timestamp Returns: output (scalar): output of the pipeline """ # Re-map if self.eeg_ch_remap: samples = samples[:, self.eeg_ch_remap] self.eeg_buffer.update(samples) self._send_outputs(samples, timestamp, 'raw_eeg') # Apply filtes filt_samples = samples if config['filter']: filt_samples, self.bandpass_filt['zi'] = signal.lfilter( self.bandpass_filt['b'], self.bandpass_filt['a'], samples, axis=0, zi=self.bandpass_filt['zi']) # self._send_filtered_eeg(filt_samples, timestamp) self.filt_eeg_buffer.update(filt_samples) if config['hpfilter']: filt_samples, self.hp_filt['zi'] = signal.lfilter( self.hp_filt['b'], self.hp_filt['a'], filt_samples, axis=0, zi=self.hp_filt['zi']) self.hpfilt_eeg_buffer.update(filt_samples) if config['lpfilter']: smooth_eeg_samples, self.lp_filt['zi'] = signal.lfilter( self.lp_filt['b'], self.lp_filt['a'], filt_samples, axis=0, zi=self.lp_filt['zi']) if self.debug_outputs: self._send_output_vec(smooth_eeg_samples, timestamp, 'smooth_eeg') else: smooth_eeg_samples = filt_samples self.smooth_eeg_buffer.update(smooth_eeg_samples) if config['filter_bank']: filter_bank_samples = {} for name, filt_dict in self.filter_bank.items(): filter_bank_samples[name], self.filter_bank[name]['zi'] = \ signal.lfilter(filt_dict['b'], filt_dict['a'], filt_samples, axis=0, zi=self.filter_bank[name]['zi']) low_freq_chs = filter_bank_samples['delta'][0, [ 0, 2 ]] #+ filter_bank_samples['theta'][0, [0, 1]] window = self.smooth_eeg_buffer.extract(self.window_len) eegEarWindow = window[:, 3] #data from right ear Channel #eye movement computed from the difference between two frontal channels eyewindow = self.smooth_eeg_buffer.extract(200) eegFLWindow = eyewindow[:, 1] eegFRWindow = eyewindow[:, 2] # norm_diff_eyes = eegFLWindow[-1] - eegFRWindow[-1]*np.nanstd(eegFLWindow, axis=0)/np.nanstd(eegFRWindow, axis=0) # eyeH = np.reshape([np.square(norm_diff_eyes)], (1, 1)) #find blinks in the left eegEarWindow blinkVal = ut.blink_template_match(eegEarWindow) if (blinkVal > 100000 and self.blink == 0): self.blink = 50 self.blinkwait = 350 else: if (self.blinkwait > 0): self.blinkwait -= 1 if (self.blink > 0): self.blink -= 1 # LONGER-TERM CALM SCORE based on Saccadic Eye Movement eye_mov_percent = np.reshape( np.percentile(eegFLWindow - eegFRWindow, 90), (1, 1)) self.eye_mov_percent_buffer.update(eye_mov_percent) remap_eye_mov_percent = ut.sigmoid( self.eye_mov_percent_buffer.extract().mean(), 0.5, -10, 0) max_value = 1 incr_decr = remap_eye_mov_percent < 0.2 inc = self.increments_buffer.extract().mean() dpoints_per_second = 0.0005 if incr_decr: self.slow_calm_score += dpoints_per_second * inc # 1/max([max_value - self.slow_calm_score, 1]) else: self.slow_calm_score -= dpoints_per_second * inc * 4 #0.7 # (self.slow_calm_score)/1280 self.increments_buffer.update(np.reshape(incr_decr, (1, 1))) if self.slow_calm_score > max_value: self.slow_calm_score = max_value elif self.slow_calm_score < 0: self.slow_calm_score = 0 self.slow_calm_score_buffer.update( np.reshape(self.slow_calm_score, (1, 1))) # Send outputs at a reduced sampling rate if self.smooth_eeg_buffer.pts % 3 == 0: self._send_output_vec(smooth_eeg_samples, timestamp, 'muse/eeg') if (self.blink > 0): self._send_output(np.array([[1]]), timestamp, 'blink') else: self._send_output(np.array([[0]]), timestamp, 'blink') self._send_output(blinkVal / 300000, timestamp, 'blinkVal') self._send_output(remap_eye_mov_percent, timestamp, 'saccad') self._send_output( np.reshape(self.slow_calm_score_buffer.extract().mean(), (1, 1)), timestamp, 'calm') # slow_calm_score self._send_output(low_freq_chs / self.low_freq_chs_std + 0.5, timestamp, 'low_freq_chs') # process and send output at every step. usually about every 1/10s if self.eeg_buffer.pts > self.step: self.eeg_buffer.pts = 0 # Get filtered EEG window if config['lpfilter']: window = self.smooth_eeg_buffer.extract(self.window_len) else: window = self.eeg_buffer.extract(self.window_len) psd_raw_buffer = self.eeg_buffer.extract(self.window_len) # Get average PSD psd, f = ut.fft_continuous(psd_raw_buffer, n=int(self.fs), psd=True, log='psd', fs=self.fs, window='hamming') self.psd_buffer.update(np.expand_dims(psd, axis=0)) mean_psd = np.nanmean(self.psd_buffer.extract(), axis=0) # find variance of eegWindow for Bad Signal detact eegVar = np.nanvar(window, axis=0) self._send_output_vec(eegVar.reshape(1, self.n_channels), timestamp, 'hsi') if (self.sparseOutput != None): #send channel varience for signal quality indication at source Raspberry Pi #send(Address('10.0.0.14','1234'), "/hsi", eegVar[0],eegVar[1],eegVar[2],eegVar[3]) self._send_sparseOutput_vec(eegVar.reshape(1, self.n_channels), timestamp, 'hsi') # Get band powers and ratios bandPowers, bandNames = ut.compute_band_powers(mean_psd, f, relative=False) ratioPowers, ratioNames = ut.compute_band_ratios(bandPowers) if (self.firstWindowProc): self.band_powers = bandPowers self.band_names = bandNames self.ratio_powers = ratioPowers self.ratio_names = ratioNames self.scores = np.zeros((len(self.band_names), self.n_channels)) self.firstWindowProc = False if (eegVar.mean() < 300 and self.blinkwait == 0): #threshold for good data for i, (name, hist) in enumerate(self.hists.items()): self.band_powers = bandPowers self.ratio_powers = ratioPowers #send good data indicator based on mean eegWindow variance and blinkwait self._send_output(np.array([[1]]), timestamp, 'goodData') #good data else: self._send_output(np.array([[0]]), timestamp, 'goodData') #good data self._send_outputs(self.band_powers, timestamp, 'bands') self._send_outputs(self.ratio_powers, timestamp, 'ratios') mask = ((f >= 30) & (f < 50)) self.low_freq_chs_buffer.update(np.reshape(low_freq_chs, (1, -1))) self.low_freq_chs_std = self.low_freq_chs_buffer.extract().std( axis=0) emg_power = np.mean(mean_psd[mask, 0], axis=0) #HF power of right ear self._send_output(np.array([np.sqrt(emg_power) / 2]), timestamp, 'emg')
def delta(x, y, x_denominators=1, y_denominators=1, assume_normal=True, alpha=0.05, min_observations=20, nruns=10000, relative=False): """ Calculates the difference of means between the samples in a statistical sense. Computation is done in form of treatment minus control, i.e. x-y. Note that NaNs are treated as if they do not exist in the data. :param x: sample of the treatment group :type x: pd.Series or array-like :param y: sample of the control group :type y: pd.Series or array-like :param x_denominators: sample of the treatment group :type x_denominators: pd.Series or array-like :param y_denominators: sample of the control group :type y_denominators: pd.Series or array-like :param assume_normal: specifies whether normal distribution assumptions can be made :type assume_normal: boolean :param alpha: significance level (alpha) :type alpha: float :param min_observations: minimum number of observations needed :type min_observations: int :param nruns: only used if assume normal is false :type nruns: int :param relative: if relative==True, then the values will be returned as distances below and above the mean, respectively, rather than the absolute values. In this case, the interval is mean-ret_val[0] to mean+ret_val[1]. This is more useful in many situations because it corresponds with the sem() and std() functions. :type: relative: boolean :return: results of type SimpleTestStatistics :rtype: SimpleTestStatistics """ # Check if data was provided and it has correct format if x is None or y is None: raise ValueError('Please provide two non-None samples.') if not isinstance(x, pd.Series) and not isinstance( x, np.ndarray) and not isinstance(x, list): raise TypeError('Please provide samples of type Series or list.') if type(x) != type(y): raise TypeError('Please provide samples of the same type.') # check x and y are 'array-like' assert hasattr(x, '__len__') assert hasattr(y, '__len__') # If either denominator is a scalar, convert it to a # list of identical entries: if not hasattr(x_denominators, '__len__'): x_denominators = [x_denominators] * len(x) if not hasattr(y_denominators, '__len__'): y_denominators = [y_denominators] * len(y) # lengths should match assert len(x) == len(x_denominators) assert len(y) == len(y_denominators) # Must be numpy arrays of floats (otherwise .isnan won't work) x = np.array(x, dtype=float) y = np.array(y, dtype=float) x_denominators = np.array(x_denominators, dtype=float) y_denominators = np.array(y_denominators, dtype=float) # Add a NaN to the numerator for # each zero or NaN in the denominator: x = x / x_denominators * x_denominators y = y / y_denominators * y_denominators # Next, any NaNs in the numerator must be 'copied' to the denominator. x_denominators = x_denominators + (x * 0.0) y_denominators = y_denominators + (y * 0.0) # confirm the numerators have the same 'nan-ness' as their denominators assert (np.isnan(x) == np.isnan(x_denominators)).all() assert (np.isnan(y) == np.isnan(y_denominators)).all() percentiles = [alpha * 100 / 2, 100 - alpha * 100 / 2] _x = x _y = y _x_denominators = x_denominators _y_denominators = y_denominators _x_ratio = _x / _x_denominators _y_ratio = _y / _y_denominators _x_strange = _x / np.nanmean(_x_denominators) _y_strange = _y / np.nanmean(_y_denominators) # Four variables no longer used in this function, let's delete them for simplicity del x del y del x_denominators del y_denominators x_nan = np.isnan(_x_ratio).sum() y_nan = np.isnan(_y_ratio).sum() if x_nan > 0: warnings.warn('Discarding ' + str(x_nan) + ' NaN(s) in the x array!') logger.warning('Discarding ' + str(x_nan) + ' NaN(s) in the x array!') if y_nan > 0: warnings.warn('Discarding ' + str(y_nan) + ' NaN(s) in the y array!') logger.warning('Discarding ' + str(x_nan) + ' NaN(s) in the x array!') ss_x = sample_size(_x_ratio) ss_y = sample_size(_y_ratio) # Checking if enough observations are left after dropping NaNs partial_simple_test_stats = None if min(ss_x, ss_y) < min_observations: # Set mean to nan mu = np.nan # Create nan dictionary c_i = dict(list(zip(percentiles, np.empty(len(percentiles)) * np.nan))) else: # Computing the mean mu = _delta_mean(_x, _y) # Computing the confidence intervals if assume_normal: logger.info( "The distribution of two samples is assumed normal. " "Performing the sample difference distribution calculation.") partial_simple_test_stats = normal_sample_weighted_difference( x_numerators=_x, y_numerators=_y, x_denominators=_x_denominators, y_denominators=_y_denominators, percentiles=percentiles, relative=relative) c_i = partial_simple_test_stats['c_i'] mu = partial_simple_test_stats[ 'mean1'] - partial_simple_test_stats['mean2'] else: logger.info( "The distribution of two samples is not normal. Performing the bootstrap." ) c_i, _ = bootstrap(x=_x_strange, y=_y_strange, percentiles=percentiles, nruns=nruns, relative=relative) if partial_simple_test_stats is not None: # correct the last few lines!! treatment_statistics = SampleStatistics( ss_x, partial_simple_test_stats['mean1'], partial_simple_test_stats['var1']) control_statistics = SampleStatistics( ss_y, partial_simple_test_stats['mean2'], partial_simple_test_stats['var2']) else: # actually, this is a bit rubbish, only applies to bootstrap and min_observations: treatment_statistics = SampleStatistics(ss_x, float(np.nanmean(_x_strange)), float(np.nanvar(_x_strange))) control_statistics = SampleStatistics(ss_y, float(np.nanmean(_y_strange)), float(np.nanvar(_y_strange))) variant_statistics = BaseTestStatistics(control_statistics, treatment_statistics) if partial_simple_test_stats is not None: p_value = partial_simple_test_stats['p_value'] else: p_value = compute_p_value_from_samples(_x_strange, _y_strange) statistical_power = compute_statistical_power_from_samples( _x_strange, _y_strange, alpha) # TODO: wrong logger.info("Delta calculation finished!") return SimpleTestStatistics(variant_statistics.control_statistics, variant_statistics.treatment_statistics, float(mu), c_i, p_value, statistical_power)
def get_statistics_per_category(databaseFolder, processedDataFolder=None): if processedDataFolder == None: processedDataFolder = "datasets/acousticic" classes = sorted([f for f in os.listdir(databaseFolder) if os.path.isdir(os.path.join(databaseFolder, f)) and not f.startswith('.')], key=lambda f: f.lower()) stats_names = ['max', 'min', 'mean', 'median', 'std', 'var', 'kurt', 'skew', 'percentile25', 'percentile50', 'percentile75'] categoryDictionary = {"voice": ["f0", "vuv"], "glottal_flow": ["naq", "qoq", "h1h2", "psp", "mdq", "peakslope", "rd", "creak"], "mcep": ["mcep_"], "hmpdm": ["hmpdm_"], "hmpdd": ["hmpdd_"], } for category in categoryDictionary.keys(): startFlag = True analyzedFiles = [] for className in classes: files = sorted([f for f in os.listdir(os.path.join(databaseFolder, className)) if os.path.isfile(os.path.join(databaseFolder, className, f)) and not f.startswith('.') and f[-4:].lower() == ".csv"], key=lambda f: f.lower()) analyzedFiles += ["%s,%s" % (file, className) for file in files] for feat_file in files: mm_feats = [] mm_names = [] df = pandas.read_csv(os.path.join(databaseFolder, className, feat_file), header='infer') feature_names = df.columns.values for feat in feature_names: reference = categoryDictionary.get(category) for string in reference: if feat.strip().lower().startswith(string) \ or feat.strip().lower().endswith(string): # Feature vector vals = df[feat].values # Run statistics maximum = np.nanmax(vals) minimum = np.nanmin(vals) mean = np.nanmean(vals) median = np.nanmedian(vals) std = np.nanstd(vals) var = np.nanvar(vals) kurt = scipy.stats.kurtosis(vals) skew = scipy.stats.skew(vals) percentile25 = np.nanpercentile(vals, 25) percentile50 = np.nanpercentile(vals, 50) percentile75 = np.nanpercentile(vals, 75) names = [feat.strip() + "_" + stat for stat in stats_names] feats = [maximum, minimum, mean, median, std, var, kurt, skew, percentile25, percentile50, percentile75] if startFlag: for n in names: mm_names.append(n) for f in feats: if np.isinf(f): mm_feats.append(np.sign(f)) elif np.isnan(f): mm_feats.append(0) else: mm_feats.append(f) break if startFlag: matrix = [mm_names + ["Class"]] startFlag = False matrix.append(mm_feats + [className]) am.create_arff(matrix,classes,processedDataFolder,category,category) print("Analysis of %s acquired." % (category)) with open(os.path.join(processedDataFolder, "%s.txt"%(category)), "w+") as files: files.write("\n".join(analyzedFiles))
Rs_GT = la.norm(x_GT - N, axis=1) RD_GT = (-Rs_GT[mp[:, 1]] + Rs_GT[mp[:, 0]]) Rs_corr = np.array([NR_corr[i - 1][3] for i in Nids]) Rs = np.array(row.iat[9]) * 1e-9 * C0 + Rs_corr RD = (-Rs[mp[:, 1]] + Rs[mp[:, 0]]) diff = RD_GT - RD while sum((~np.isnan(diff)).astype(int)) > 1: score = np.zeros(M) med = np.zeros(M) var = np.zeros(M) for i in range(M): x = np.abs(diff[(mp == i).any(axis=1)]) var[i] = np.nanvar(x)**0.5 med[i] = np.nanmedian(x) if np.nanmin(var / med) > 1e-1: break rem = np.nanargmin(var / med) diffidx = np.where((mp == rem).any(axis=1) & (~np.isnan(diff)))[0][-1] cond = mp[diffidx, 0] == rem flip = 1 if cond else -1 d = np.sign(diff[diffidx]) * flip * med[rem] if abs(d) < 5e4:
def get_statistics_covarep(databaseFolder, processedDataFolder=None, outputFileName=None, relationName=None): if processedDataFolder == None: processedDataFolder = "datasets/acousticic" if outputFileName== None: outputFileName = "all" if relationName == None: relationName = "all_acousticical" classes = sorted([f for f in os.listdir(databaseFolder) if os.path.isdir(os.path.join(databaseFolder, f)) and not f.startswith('.')], key=lambda f: f.lower()) stats_names = ['max', 'min', 'mean', 'median', 'std', 'var', 'kurt', 'skew', 'percentile25', 'percentile50', 'percentile75'] startFlag = True analyzedFiles = [] for className in classes: files = sorted([f for f in os.listdir(os.path.join(databaseFolder, className)) if os.path.isfile(os.path.join(databaseFolder, className, f)) and not f.startswith('.') and f[-4:].lower() == ".csv"], key=lambda f: f.lower()) analyzedFiles += ["%s,%s" % (file, className) for file in files] for feat_file in files: mm_feats = [] mm_names = [] df = pandas.read_csv(os.path.join(databaseFolder, className, feat_file), header='infer') feature_names = df.columns.values for feat in feature_names: # Feature vector vals = df[feat].values # Run statistics maximum = np.nanmax(vals) minimum = np.nanmin(vals) mean = np.nanmean(vals) median = np.nanmedian(vals) std = np.nanstd(vals) var = np.nanvar(vals) kurt = scipy.stats.kurtosis(vals) skew = scipy.stats.skew(vals) percentile25 = np.nanpercentile(vals, 25) percentile50 = np.nanpercentile(vals, 50) percentile75 = np.nanpercentile(vals, 75) names = [feat.strip() + "_" + stat for stat in stats_names] feats = [maximum, minimum, mean, median, std, var, kurt, skew, percentile25, percentile50, percentile75] if startFlag: for n in names: mm_names.append(n) for f in feats: if np.isinf(f): mm_feats.append(np.sign(f)) elif np.isnan(f): mm_feats.append(0) else: mm_feats.append(f) if startFlag: matrix = [mm_names + ["Class"]] startFlag = False matrix.append(mm_feats + [className]) am.create_arff(matrix, classes, processedDataFolder, outputFileName, relationName) print("Analysis of all COVAREP features acquired.") with open(os.path.join(processedDataFolder, outputFileName + ".txt"), "w+") as files: files.write("\n".join(analyzedFiles))
def de_mean(u, v): up = u - np.nanmean(u) vp = v - np.nanmean(v) return up, vp # find angle of principal axes up, vp = de_mean(ubar, vbar) # we mask out high speeds because they introduced a funny direction spd_lim = 0.5 spd = np.sqrt(up**2 + vp**2) up[spd > spd_lim] = np.nan vp[spd > spd_lim] = np.nan up, vp = de_mean(up, vp) theta = 0.5 * np.arctan2(2 * np.nanmean(up * vp), (np.nanvar(up) - np.nanvar(vp))) # and rotate ubar_r, vbar_r = rot_vec(ubar, vbar, theta) u_r, v_r = rot_vec(u, v, theta) # plotting plt.close('all') fig = plt.figure(figsize=(18, 10)) # map ax = fig.add_subplot(3, 4, 4) ax.plot(lon, lat, '*r') pad = .1 ax.axis([lon - pad, lon + pad, lat - pad, lat + pad]) pfun.add_coast(ax)
def __QS_move__(self, idt, **kwargs): if self._iDT == idt: return 0 CurInd = self._AllDTs.index(idt) if CurInd <= self.EventPreWindow + self.EstWindow: return 0 self._Output["事件记录"][:, 2] += 1 IDs = self._FactorTable.getFilteredID(idt=idt, id_filter_str=self.EventFilter) nID = len(IDs) if nID > 0: self._Output["事件记录"] = np.r_[self._Output["事件记录"], np.c_[IDs, [idt] * nID, np.zeros(shape=(nID, 1))]] Temp = np.full(shape=(nID, self.EventPreWindow + 1 + self.EventPostWindow), fill_value=np.nan) self._Output["预期收益率"] = np.r_[self._Output["预期收益率"], Temp] self._Output["异常收益率"] = np.r_[self._Output["异常收益率"], Temp] self._Output["异常方差"] = np.r_[self._Output["异常方差"], Temp] EstStartInd = CurInd - self.EventPreWindow - self.EstWindow - 1 Price = self._FactorTable.readData( dts=self._AllDTs[EstStartInd:CurInd + 1], ids=IDs, factor_names=[self.PriceFactor]).iloc[0, :, :] Return = _calcReturn(Price.values, return_type=self.ReturnType) BPrice = self._BenchmarkFT.readData( factor_names=[self.BenchmarkPrice], ids=[self.BenchmarkID], dts=self._AllDTs[EstStartInd:CurInd + 1]).iloc[0, :, :] ExpectedReturn = _calcReturn(BPrice.values, return_type=self.ReturnType).repeat( nID, axis=1) self._Output["预期收益率"][-nID:, :self.EventPreWindow + 1] = ExpectedReturn[self.EstWindow:].T self._Output["异常收益率"][-nID:, :self.EventPreWindow + 1] = (Return[self.EstWindow:] - ExpectedReturn[self.EstWindow:]).T self._Output["异常方差"][-nID:, :] = np.nanvar( Return[:self.EstWindow] - ExpectedReturn[:self.EstWindow], axis=0, ddof=1).reshape( (nID, 1)).repeat(self.EventPreWindow + 1 + self.EventPostWindow, axis=1) Mask = (self._Output["事件记录"][:, 2] <= self.EventPostWindow) IDs = self._Output["事件记录"][:, 0][Mask] RowPos, ColPos = np.arange( self._Output["异常收益率"].shape[0])[Mask].tolist(), ( self._Output["事件记录"][Mask, 2] + self.EventPreWindow).astype( np.int) BPrice = self._BenchmarkFT.readData( factor_names=[self.BenchmarkPrice], ids=[self.BenchmarkID], dts=[self._AllDTs[CurInd - 1], idt]).iloc[0, :, 0] ExpectedReturn = _calcReturn(BPrice.values, return_type=self.ReturnType).repeat( len(IDs), axis=0) self._Output["预期收益率"][RowPos, ColPos] = ExpectedReturn Price = self._FactorTable.readData( dts=[self._AllDTs[CurInd - 1], idt], ids=sorted(set(IDs)), factor_names=[self.PriceFactor]).iloc[0, :, :].loc[:, IDs] self._Output["异常收益率"][RowPos, ColPos] = ( _calcReturn(Price.values, return_type=self.ReturnType)[0] - ExpectedReturn) return 0