def test_nanstd_issue60(): "nanstd regression test (issue #60)" with warnings.catch_warnings(): warnings.simplefilter("ignore") f = bn.nanstd([1.0], ddof=1) with np.errstate(invalid='ignore'): s = bn.slow.nanstd([1.0], ddof=1) assert_equal(f, s, err_msg="bn.nanstd([1.0], ddof=1) wrong") f = bn.nanstd([1], ddof=1) with np.errstate(invalid='ignore'): s = bn.slow.nanstd([1], ddof=1) assert_equal(f, s, err_msg="bn.nanstd([1], ddof=1) wrong") b = bn.nanstd([1, np.nan], ddof=1) with np.errstate(invalid='ignore'): b = bn.slow.nanstd([1, np.nan], ddof=1) assert_equal(f, s, err_msg="bn.nanstd([1, nan], ddof=1) wrong") b = bn.nanstd([[1, np.nan], [np.nan, 1]], axis=0, ddof=1) with np.errstate(invalid='ignore'): b = bn.slow.nanstd([[1, np.nan], [np.nan, 1]], axis=0, ddof=1) assert_equal(f, s, err_msg="issue #60 regression")
def pairwise_covariance(x_mat, y=None, correlation=False): x_mat = x_mat.copy() x_nan = np.isnan(x_mat) if y is not None: if y.shape[0] != 1: assert y.shape == x_mat.shape, 'y and x_mat should be of the same shape if y has more than 1 rows' y_mat = y else: y_mat = np.tile(y, (x_mat.shape[0], 1)) y_nan = np.isnan(y_mat) x_mat[y_nan] = np.nan y_mat[x_nan] = np.nan pw_multiply = np.multiply( x_mat - bn.nanmean(x_mat, axis=1).reshape(-1, 1), y_mat - bn.nanmean(y_mat, axis=1).reshape(-1, 1)) cov = bn.nansum(pw_multiply, axis=1) / ( pw_multiply.shape[1] - np.isnan(pw_multiply).sum(axis=1) - 1) if correlation: return cov / np.multiply(bn.nanstd(x_mat, axis=1, ddof=1), bn.nanstd(y_mat, axis=1, ddof=1)) return cov else: if correlation: return pd.DataFrame(x_mat).T.corr().values return pd.DataFrame(x_mat).T.cov().values
def proportionality(x, y): num = bottleneck.nanvar(np.log1p(y) - np.log1p(x)) denom = (bottleneck.nanstd(np.log1p(x)) + bottleneck.nanstd(np.log1p(y)))**2 try: return num / denom except: return np.nan
def _nanstd(array, axis=None, ddof=0): """Bottleneck nanstd function that handle tuple axis.""" if isinstance(axis, tuple): array = _move_tuple_axes_first(array, axis=axis) axis = 0 if isinstance(array, Quantity): return array.__array_wrap__(bottleneck.nanstd(array, axis=axis, ddof=ddof)) else: return bottleneck.nanstd(array, axis=axis, ddof=ddof)
def xcorr(x, y): n = len(x) m = len(y) meany = np.nanmean(y) stdy = np.nanstd(np.asarray(y)) tmp = rolling_window(x, m) with np.errstate(divide="ignore"): c = bn.nansum((y - meany) * (tmp - np.reshape(bn.nanmean(tmp, -1), (n - m + 1, 1))), -1) / (m * bn.nanstd(tmp, -1) * stdy) c[m * bn.nanstd(tmp, -1) * stdy == 0] = 0 return c
def _nanstd(array, axis=None, ddof=0): """Bottleneck nanstd function that handle tuple axis.""" if isinstance(axis, tuple): array = _move_tuple_axes_first(array, axis=axis) axis = 0 if isinstance(array, Quantity): return array.__array_wrap__( bottleneck.nanstd(array, axis=axis, ddof=ddof)) else: return bottleneck.nanstd(array, axis=axis, ddof=ddof)
def calc_table_np(self, array): if len(array) == 0: return array if self.out_choiced == 0: #snr return self.make_table( (bottleneck.nanmean(array, axis=0) / bottleneck.nanstd(array, axis=0)).reshape(1, -1), self.data) elif self.out_choiced == 1: #avg return self.make_table( bottleneck.nanmean(array, axis=0).reshape(1, -1), self.data) else: # std return self.make_table( bottleneck.nanstd(array, axis=0).reshape(1, -1), self.data)
def nanstd(array, axis=None, ddof=0): """ A nanstd function that uses bottleneck if available. """ if HAS_BOTTLENECK: if isinstance(axis, tuple): array = move_tuple_axes_first(array, axis=axis) axis = 0 if isinstance(array, u.Quantity): return array.__array_wrap__(bn.nanstd(array, axis=axis, ddof=ddof)) else: return bn.nanstd(array, axis=axis, ddof=ddof) else: return np.nanstd(array, axis=axis, ddof=ddof)
def weighted_mean(_line): max_weight = 50 # print _line.shape median_2d = bottleneck.nanmedian(_line, axis=1).reshape(_line.shape[0], 1).repeat(_line.shape[1], axis=1) std = bottleneck.nanstd(_line, axis=1) std_2d = std.reshape(_line.shape[0], 1).repeat(_line.shape[1], axis=1) weight_2d = numpy.fabs(std_2d / (_line - median_2d)) # weight_2d[weight_2d > max_weight] = max_weight weight_2d[numpy.isinf(weight_2d)] = max_weight for i in range(3): avg = bottleneck.nansum(_line * weight_2d, axis=1) / bottleneck.nansum( weight_2d, axis=1) avg_2d = avg.reshape(_line.shape[0], 1).repeat(_line.shape[1], axis=1) std = numpy.sqrt( bottleneck.nansum(((_line - avg_2d)**2 * weight_2d), axis=1) / bottleneck.nansum(weight_2d, axis=1)) std_2d = std.reshape(_line.shape[0], 1).repeat(_line.shape[1], axis=1) weight_2d = numpy.fabs(std_2d / (_line - avg_2d)) #weight_2d[weight_2d > max_weight] = max_weight weight_2d[numpy.isinf(weight_2d)] = max_weight return bottleneck.nansum(_line * weight_2d, axis=1) / bottleneck.nansum( weight_2d, axis=1)
def _nanstd(array, axis=None, ddof=0): """Bottleneck nanstd function that handle tuple axis.""" if isinstance(axis, tuple): array = _move_tuple_axes_first(array, axis=axis) axis = 0 return bottleneck.nanstd(array, axis=axis, ddof=ddof)
def fast_helper(data, distances, rlimit, numan): size = data.shape rstep = rlimit / numan r1 = 0.0 r2 = float(rstep) # we need to cast this as a float so that numba # doesn't complain r_vec = np.zeros(numan,dtype=np.float32) mean_vec = np.zeros(numan,dtype=np.float32) error_vec = np.zeros(numan,dtype=np.float32) for k in range(numan): anlist = [] for i in range(size[0]): for j in range(size[1]): if distances[i,j] > r1: if distances[i,j] <= r2: anlist.append(data[i,j]) anarray = np.array(anlist,dtype=np.float32) mean_vec[k] = bn.nansum(anarray) error_vec[k] = bn.nanstd(anarray) r_vec[k] = (r1 + r2)*0.5 r1 = r2 r2 += rstep return np.array([r_vec,mean_vec,error_vec])
def bootstrap(func, arglist, N, kwargs={}): '''Computes error via bootstrapping on an arbitrary function. The major restriction is that func is assumed to return a single, 1D, Numpy array. Bootstrap will also resample ALL of the elements of arglist. If you want to keep some inputs unchanged pass them as keywords. The func can have an arbitrary number of arguments and keyword arguments. If the output of func is a Ndarray of length N then bootstrap returns two arrays of length N. The first is the mean value over all bootstraps and the second is the stddev of the same. ''' if type(arglist) != list: arglist = [arglist] size = len(arglist[0]) resultarr = None for i in range(N): idx = np.random.randint(0,size,size) bootargs = [i[idx] for i in arglist] result = func(*bootargs,**kwargs) try: resultarr = np.vstack((resultarr,result)) except ValueError: resultarr = result print np.isnan(resultarr).sum() return bn.nanmean(resultarr,axis=0),bn.nanstd(resultarr,axis=0)
def zscore(arr, axis=None): """ Z-score along the specified axis. Parameters ---------- arr : ndarray Input array. axis : {int, None}, optional The axis along which to take the z-score. The default (None) is to find the z-score of the flattened array. Returns ------- y : ndarray A copy normalized with the Z-score along the specified axis. Examples -------- >>> arr = np.array([1, np.nan, 2, 3]) >>> zscore(arr) array([-1.22474487, NaN, 0. , 1.22474487]) """ arr = demean(arr, axis) norm = bn.nanstd(arr, axis) if (axis != 0) and (not axis is None) and (not np.isscalar(norm)): ind = [slice(None)] * arr.ndim ind[axis] = np.newaxis norm = norm[ind] arr /= norm return arr
def numba_cent(data, distances, maxd, numan): size = data.shape rstep = maxd / numan r1 = 0.0 r2 = rstep stdarr = np.zeros(numan,dtype=np.float32) rarr = np.zeros(numan,dtype=np.float32) outarr = np.zeros(numan,dtype=np.float32) for k in range(numan): anlist = [] for i in range(size[0]): for j in range(size[1]): if distances[i,j] > r1: if distances[i,j] <= r2: anlist.append(data[i,j]) # outarr[k] += data[i,j] anarray = np.array(anlist,dtype=np.float32) outarr[k] = bn.nansum(anarray) stdarr[k] = bn.nanstd(anarray) rarr[k] = (r1 + r2)*0.5 r1 = r2 r2 += rstep return np.array([rarr,outarr,stdarr])
def time_step(self, xt): xt = np.reshape(xt, newshape=self.dimensions) ret_val = 0. self.buffer.append(xt) self.present.time_step(xt) if self.t >= self.buffer_len: pst_xt = self.buffer[0] self.past.time_step(pst_xt) if self.t >= self.present.theta + self.past.theta: ret_val = self.comparison_function(self.present, self.past, self.present.alpha) self.ma_window.append(ret_val) if self.t % self.ma_recalc_delay == 0: self.anomaly_mean = bn.nanmean(self.ma_window) self.anomaly_std = bn.nanstd(self.ma_window, ddof=self.ddof) if self.anomaly_std is None or self.t < len(self.ma_window): anomaly_density = 0 else: normalized_score = (ret_val - self.anomaly_mean)/self.anomaly_std if -4 <= normalized_score <= 4: anomaly_density = CDF_TABLE[round(normalized_score, 3)] elif normalized_score > 4: anomaly_density = 1. else: anomaly_density = 0. self.t += 1 return ret_val, anomaly_density
def weighted_mean(_line): max_weight = 50 # print _line.shape median_2d = bottleneck.nanmedian(_line, axis=1).reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) std = bottleneck.nanstd(_line, axis=1) std_2d = std.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) weight_2d = numpy.fabs(std_2d / (_line - median_2d)) # weight_2d[weight_2d > max_weight] = max_weight weight_2d[numpy.isinf(weight_2d)] = max_weight for i in range(3): avg = bottleneck.nansum(_line*weight_2d, axis=1)/bottleneck.nansum(weight_2d, axis=1) avg_2d = avg.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) std = numpy.sqrt(bottleneck.nansum(((_line - avg_2d)**2 * weight_2d), axis=1)/bottleneck.nansum(weight_2d, axis=1)) std_2d = std.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) weight_2d = numpy.fabs(std_2d / (_line - avg_2d)) #weight_2d[weight_2d > max_weight] = max_weight weight_2d[numpy.isinf(weight_2d)] = max_weight return bottleneck.nansum(_line*weight_2d, axis=1)/bottleneck.nansum(weight_2d, axis=1)
def factor_normalize(factor): x_m = factor.values mean = bn.nanmean(x_m, axis=1).reshape(-1, 1) std = bn.nanstd(x_m, axis=1, ddof=1).reshape(-1, 1) with np.errstate(invalid='ignore'): res = (x_m - mean) / std return pd.DataFrame(res, factor.index, factor.columns)
def simple_sky(sky): skymed = bt.median(sky) skymean = bt.nanmean(sky) skymod = 3. * skymed - 2. * skymean skystd = bt.nanstd(sky) return skymod, skystd, len(sky)
def std_bootstrap(argument): # arguments = sample, indexes, i sample = argument[0] weights = argument[1] if (len(argument) == 3): std1 = argument[2] sample = np.random.normal(loc=sample, scale=std1) X_resample = bootstrap_resample(X=sample, weights=weights) median_boot = bn.nanstd(X_resample) return median_boot
def find_modes(self, prominence_factor=2): T_shrinked = np.nanmean(abs(self.transmission - np.nanmean(self.transmission, axis=0)), axis=1) mode_indexes, _ = scipy.signal.find_peaks( T_shrinked, prominence=prominence_factor * bn.nanstd(T_shrinked)) mode_wavelengths = np.sort(self.wavelengths[mode_indexes]) mode_wavelengths = np.array( [x for x in mode_wavelengths if x > self.lambda_0]) self.mode_wavelengths = mode_wavelengths return mode_wavelengths
def _compute_average(array, type="mean"): # Compute the required type of average if type == "mean": intensity = bn.nanmean(array) elif type == "median": intensity = bn.nanmedian(array) elif type == "std": intensity = bn.nanstd(array) return {"intensity": intensity}
def __calc_censtd(_arr): # most are defined in upper _iter_rej function cen = cenfunc(_arr, axis=0) if ccdclip: # use abs(pix value) to avoid NaN from negative pixels. _evalstr = f"{NPSTR}abs(cen + zero_ref)*scale_ref" # restore zeroing & scaling ; then add rdnoise _evalstr = f"(1 + snoise_ref)*{_evalstr} + rdnoise_ref**2" std = NEVAL(f"{NPSTR}sqrt({_evalstr})") else: std = bn.nanstd(_arr, axis=0, ddof=ddof) return cen, std
def test_nanstd_issue60(): """nanstd regression test (issue #60)""" f = bn.nanstd([1.0], ddof=1) with np.errstate(invalid="ignore"): s = bn.slow.nanstd([1.0], ddof=1) assert_equal(f, s, err_msg="bn.nanstd([1.0], ddof=1) wrong") f = bn.nanstd([1], ddof=1) with np.errstate(invalid="ignore"): s = bn.slow.nanstd([1], ddof=1) assert_equal(f, s, err_msg="bn.nanstd([1], ddof=1) wrong") f = bn.nanstd([1, np.nan], ddof=1) with np.errstate(invalid="ignore"): s = bn.slow.nanstd([1, np.nan], ddof=1) assert_equal(f, s, err_msg="bn.nanstd([1, nan], ddof=1) wrong") f = bn.nanstd([[1, np.nan], [np.nan, 1]], axis=0, ddof=1) with np.errstate(invalid="ignore"): s = bn.slow.nanstd([[1, np.nan], [np.nan, 1]], axis=0, ddof=1) assert_equal(f, s, err_msg="issue #60 regression")
def calculate_std( self, scalar=False): """ Calculates the naive values for the scale and norms under the assumption that the standard deviation is a rigorous method. Parameters: scalar : boolean (default False) Fit only a single number. Otherwise fit spectral and spatial variations. """ # POSSIBLE IMPROVEMENT - add iterative outlier rejection # here. # Extract the data from the spectral cube object data = self.cube.get_filled_data().astype('=f') # Calculate the overall scale self.scale = nanstd(data) # Return if fed an image and not a cube if self.data.ndim == 2 or scalar == True: return # Calculate the spatial variations after removing the # overall scaling self.spatial_norm = nanstd(data,axis=0)/self.scale # Calculate the spectral variations after removing both # the overall and spatial variations. Do this by # flattening into a two-d array with the two image # dimensions stacked together. self.spectral_norm = nanstd( (data/self.spatial_norm/self.scale).reshape((data.shape[0], data.shape[1]* data.shape[2])), axis=1) return
def __calc_censtd(_arr): # most are defined in upper _iter_rej function cen = cenfunc(_arr, axis=0) if ccdclip: # use abs(pix value) to avoid NaN from negative pixels. std = np.sqrt( ((1 + snoise_ref)*np.abs(cen + zero_ref)*scale_ref) + rdnoise_ref**2 ) # restore zeroing & scaling ; then add rdnoise else: std = bn.nanstd(_arr, axis=0, ddof=ddof) return cen, std
def test_nanstd_issue60(): "nanstd regression test (issue #60)" f = bn.nanstd([1.0], ddof=1) with np.errstate(invalid='ignore'): s = bn.slow.nanstd([1.0], ddof=1) assert_equal(f, s, err_msg="bn.nanstd([1.0], ddof=1) wrong") f = bn.nanstd([1], ddof=1) with np.errstate(invalid='ignore'): s = bn.slow.nanstd([1], ddof=1) assert_equal(f, s, err_msg="bn.nanstd([1], ddof=1) wrong") f = bn.nanstd([1, np.nan], ddof=1) with np.errstate(invalid='ignore'): s = bn.slow.nanstd([1, np.nan], ddof=1) assert_equal(f, s, err_msg="bn.nanstd([1, nan], ddof=1) wrong") f = bn.nanstd([[1, np.nan], [np.nan, 1]], axis=0, ddof=1) with np.errstate(invalid='ignore'): s = bn.slow.nanstd([[1, np.nan], [np.nan, 1]], axis=0, ddof=1) assert_equal(f, s, err_msg="issue #60 regression")
def monte_lines(numtrys): bigarr = np.zeros((1,3)) for i in range(numtrys): v, I = ADE.ADE_gauss(1000,500,50) I *= 55/I.max() I += 3. * np.random.randn(I.size) # ADE.eplot(v,I) moments = ADE.ADE_moments(v, I, threshold=np.inf,err=np.abs(I)**0.5) bigarr = np.vstack((bigarr,moments)) bigarr = bigarr[1:] # print bigarr return bn.nanmedian(bigarr,axis=0), bn.nanstd(bigarr,axis=0)
def calculate_std(self,niter=1,spatial_smooth=None,spectral_smooth=None): """ Calculates the naive values for the scale and norms under the assumption that the median absolute deviation is a rigorous method. """ data = self.cube.get_filled_data().astype('=f') self.scale = nanstd(data) if self.spatial_norm is None: self.spatial_norm = np.ones((data.shape[1],data.shape[2])) self.spectral_norm = np.ones((data.shape[0])) for count in range(niter): scale = self.get_scale_cube() snr = data/scale self.spatial_norm = nanstd(snr,axis=0)*self.spatial_norm if beam is not None: self.spatial_norm = convolve_fft(self.spatial_norm, self.beam.as_kernel(get_pixel_scales(self.cube.wcs)), interpolate_nan=True,normalize_kernel=True) if spatial_smooth is not None: self.spatial_norm = ssig.medfilt2d(self.spatial_norm, kernel_size=spatial_smooth) snr = data/self.get_scale_cube() self.spectral_norm = nanstd(snr.reshape((snr.shape[0], snr.shape[1]* snr.shape[2])), axis=1)*self.spectral_norm if spectral_smooth is not None: self.spectral_norm = ssig.medfilt(self.spectral_norm, kernel_size=spectral_smooth) self.spectral_norm[np.isnan(self.spectral_norm) | (self.spectral_norm==0)]=1. self.spatial_norm[np.isnan(self.spatial_norm) | (self.spatial_norm==0)]=1. self.spatial_norm[~self.spatial_footprint]=np.nan self.distribution_shape=(0,self.scale) return
def Corr(A, B, n): ''' 计算两个因子向前n天的相关系数 n >= 2 ''' if n < 2: #print ("计算A和B n天的相关系数,n不得小于2,返回输入") return A stacked_A = np.empty((n, A.shape[0], A.shape[1])) stacked_B = np.empty((n, B.shape[0], B.shape[1])) for i in range(n): temp_A = shift(A, i) stacked_A[i] = temp_A temp_B = shift(B, i) stacked_B[i] = temp_B mean = bk.nanmean(stacked_A, axis=0) A_submean = stacked_A - mean mean = bk.nanmean(stacked_B, axis=0) B_submean = stacked_B - mean deno = bk.nanstd(stacked_A, axis=0) * bk.nanstd(stacked_B, axis=0) cov = bk.nanmean(A_submean * B_submean, axis=0) result = vdiv(cov, deno, 0) result[np.isnan(A * B)] = np.nan return result
def std_from_model_fuzzing(U, Y, params, du=None, nsamp=100, debug_prefix=False, full_covar=False): """Estimate std of nebular pv image due to std of fit parameters Uses a Monte Carlo simulation of nsamp realizations of the model, with parameters drawn from Gaussian distributions around the best-fit values, with widths equal to the reported stderror. Currently does not attempt to make use of the correlations between model parameters, which means we may overestimate the uncertainties.... """ ny, nu = U.shape model_stack = np.empty((nsamp, ny, nu)) scaled_means = [p.value/find_param_scale(params, n) for n, p in params.items()] scaled_covar = calculate_covar_array(params) fuzzy_params_stack = [] # Fill in a stack of nebular models, all fuzzed around the best fit for i in range(nsamp): fuzzy_params = lmfit.Parameters() fuzzy_scaled_values = np.random.multivariate_normal(scaled_means, scaled_covar) for (name, param), fuzzy_scaled_value in zip(params.items(), fuzzy_scaled_values): if param.vary and param.stderr > 0.0: if full_covar: fuzzy_value = fuzzy_scaled_value*find_param_scale(params, name) else: fuzzy_value = np.random.normal(param.value, param.stderr) else: # pegged parameter does not vary fuzzy_value = param.value # Ensure we do not stray outside of the established bounds if param.max: fuzzy_value = min(fuzzy_value, param.max) if param.min: fuzzy_value = max(fuzzy_value, param.min) fuzzy_params.add(name, value=fuzzy_value) model_stack[i, :, :] = model(U, Y, fuzzy_params, du) fuzzy_params_stack.append({k: v.value for k, v in fuzzy_params.items()}) if debug_prefix: pyfits.PrimaryHDU(model_stack).writeto( debug_prefix + "_model_stack.fits", clobber=True) with open(debug_prefix + "_model_stack.tab", "w") as f: f.write("\n".join( Table(fuzzy_params_stack).pformat(max_lines=-1, max_width=-1))) # Table(fuzzy_params_stack).write("debug_model_stack.tab", format="ascii") return bn.nanstd(model_stack, axis=0)
def faster_sigma_clip_stats(data, sigma=5, iters=5, axis=None): """ Calculate sigma clipped stats quickly using NaNs instead of masking and using bottleneck where possible. Parameters ---------- data : numpy array The data to be clipped. *The data should have had masked values replaced with NaN prior to calling this function.* sigma : float, optional Number of standard deviations (estimated with the MAD) a point must be from the central value (median) to be rejected. iters : int, optional Number of sigma clipping iterations to perform. Fewer iterations than this may be performed because iterations stop when no new data is being clipped. axis : int, optional axis along which to perform the median. Returns ------- mean, median, std : float or numpy array Clipped statistics; shape depends on the shape of the input. """ data = data.copy() for _ in range(iters): central = bn.nanmedian(data, axis=axis) try: central = central[:, np.newaxis] except (ValueError, IndexError, TypeError): pass std_dif = 1.4826 * bn.nanmedian(np.abs(data - central)) clips = np.abs(data - central) / std_dif > sigma if np.nansum(clips) == 0: break data[clips] = np.nan return (bn.nanmean(data, axis=axis), bn.nanmedian(data, axis=axis), bn.nanstd(data, axis=axis))
def do_a_line(moment_list,N,line_output,monte_output): x, l = make_line(moment_list) SNRs = np.linspace(5,100,50) results = np.empty((SNRs.size,4,2)) lp = PDF(line_output) for i, SNR in enumerate(SNRs): sn_res = np.empty((N,4)) for j in range(N): noise = get_noise(x, l,SNR) ln = l + noise cdf = np.cumsum(ln/np.sum(ln)) low, high = np.interp([0.01,0.99],cdf,x) idx = np.where((x > low) & (x <= high)) sn_res[j] = ADE.ADE_moments(x[idx],ln[idx]) measured_vals = bn.nanmean(sn_res,axis=0) measured_stds = bn.nanstd(sn_res,axis=0) # print sn_res # print measured_stds # raw_input('') results[i,:,0] = measured_vals results[i,:,1] = measured_stds ax = plt.figure().add_subplot(111) ax.set_xlabel('Velocity [km/s]') ax.set_ylabel('Flux') ax.set_title('SNR = {:5.2f}'.format(SNR)) ax.plot(x,ln) lp.savefig(ax.figure) lp.close() mp = PDF(monte_output) plots = plot_results(SNRs, results, moment_list) for i, plot in enumerate(plots): if i == 2: plot.set_ylim(-2,2) if i == 3: plot.set_ylim(-2,5) mp.savefig(plot.figure) mp.close() plt.close('all') return SNRs, results
def transformed(self, data): if data.X.shape[0] == 0: return data.X data = data.copy() with data.unlocked(): if self.method == Normalize.Vector: nans = np.isnan(data.X) nan_num = nans.sum(axis=1, keepdims=True) ys = data.X if np.any(nan_num > 0): # interpolate nan elements for normalization x = getx(data) ys = interp1d_with_unknowns_numpy(x, ys, x) ys = np.nan_to_num(ys) # edge elements can still be zero data.X = sknormalize(ys, norm='l2', axis=1, copy=False) if np.any(nan_num > 0): # keep nans where they were data.X[nans] = float("nan") elif self.method == Normalize.Area: norm_data = Integrate(methods=self.int_method, limits=[[self.lower, self.upper]])(data) data.X /= norm_data.X replace_infs(data.X) elif self.method == Normalize.SNV: data.X = (data.X - bottleneck.nanmean(data.X, axis=1).reshape(-1, 1)) / \ bottleneck.nanstd(data.X, axis=1).reshape(-1, 1) replace_infs(data.X) elif self.method == Normalize.Attribute: if self.attr in data.domain and isinstance( data.domain[self.attr], Orange.data.ContinuousVariable): ndom = Orange.data.Domain([data.domain[self.attr]]) factors = data.transform(ndom) data.X /= factors.X replace_infs(data.X) nd = data.domain[self.attr] else: # invalid attribute for normalization data.X *= float("nan") elif self.method == Normalize.MinMax: min = bottleneck.nanmin(data.X, axis=1).reshape(-1, 1) max = bottleneck.nanmax(data.X, axis=1).reshape(-1, 1) data.X = data.X / (max - min) replace_infs(data.X) return data.X
def std_filter(data, box_size): """Filter a 2D array using a standard deviation kernel. Args: data (np.ndarray): 2D array to be filtered box_size (int): Specifies the boxsize. Must be odd. Returns: np.ndarray: The filtered array """ size = (box_size, box_size) border = box_size // 2 # need to surround the data with NaNs to calculate values at the boundary padded_data = np.pad(data, (border, border), mode='constant', constant_values=np.nan) windows = _rolling_window(padded_data, size) n3, n2, n1, n0 = windows.shape # flatten the windows for bottleneck function call windows = windows.reshape((n3, n2, n1 * n0)) return bn.nanstd(windows, axis=2)
def flat_stats(image, plot=False): print('Dimensions (y,x): ' + str(image.shape)) print('Median: ' + str(bn.nanmedian(image.data))) print('Mean: ' + str(bn.nanmean(image.data))) print('Standard Deviation: ' + str(bn.nanstd(image.data))) print() Nans = np.where(np.isnan(image.data)) print('Number of NaN: ' + str(Nans[0].size)) print('Calculating number of dust spots...') temp_im = np.array(image.data)[20:1000, 20:1000] num_spots = 0 dust_spots = [] for y in range(0, temp_im.shape[0]): for x in range(0, temp_im.shape[1]): if count_spots(temp_im, temp_im[y][x], y, x): num_spots += 1 dust_spots.append((y + 20, x + 20)) if num_spots == 0: print('This image has no dust spots.') else: print('Number of dust spots: ' + str(num_spots)) print('Dust spot lower right corner coordinates (y,x):') print(dust_spots) #plots a histogram of the pixel counts in the image if plot: im_list = list(image.data.flatten()) not_nans = np.where(~np.isnan(im_list))[0].tolist() clean_im_list = [im_list[i] for i in not_nans] plt.hist(clean_im_list, bins=1000, range=(0.98, 1.02), color='blue', histtype='stepfilled') plt.show() return dust_spots
def _update_online_orbits(self): """.""" posx, posy = self._get_orbit_from_processes() posx /= 1000 posy /= 1000 nanx = _np.isnan(posx) nany = _np.isnan(posy) posx[nanx] = self.ref_orbs['X'][nanx] posy[nany] = self.ref_orbs['Y'][nany] if self._ring_extension > 1: posx = _np.tile(posx, (self._ring_extension, )) posy = _np.tile(posy, (self._ring_extension, )) orbs = {'X': posx, 'Y': posy} for plane in ('X', 'Y'): with self._lock_raw_orbs: raws = self.raw_orbs raws[plane].append(orbs[plane]) raws[plane] = raws[plane][-self._smooth_npts:] if not raws[plane]: return if self._smooth_meth == self._csorb.SmoothMeth.Average: orb = _np.mean(raws[plane], axis=0) else: orb = _np.median(raws[plane], axis=0) self.smooth_orb[plane] = orb self.new_orbit.set() for plane in ('X', 'Y'): orb = self.smooth_orb[plane] dorb = orb - self.ref_orbs[plane] self.run_callbacks(f'SlowOrb{plane:s}-Mon', _np.array(orb)) self.run_callbacks(f'DeltaOrb{plane:s}Avg-Mon', _bn.nanmean(dorb)) self.run_callbacks(f'DeltaOrb{plane:s}Std-Mon', _bn.nanstd(dorb)) self.run_callbacks(f'DeltaOrb{plane:s}Min-Mon', _bn.nanmin(dorb)) self.run_callbacks(f'DeltaOrb{plane:s}Max-Mon', _bn.nanmax(dorb))
def batch_mask(names, min=0.96): flat_file = open('flat_stats_' + str(min) + '.txt', 'w') pixVals = [ 1.06, 1.04, 1.02, 0.98, 0.96, 0.94, 0.92, 0.9, 0.85, 0.8, 0.75, 0.7 ] files = glob.glob('visao_flat_2*') print(files) for i, f in enumerate(files): pixList = dust_vals(f) flat_file.write(f + '\n') flat_file.write(str(pixVals) + '\n') flat_file.write(str(pixList) + '\n') mask = dust_mask(f, min, 2) fits.writeto(str(min) + '_level/visao_flat_mask_' + names[i] + '_min' + str(min) + '.fits', mask, clobber=True) flat_file.write('Median: ' + str(bn.nanmedian(mask)) + '\t') flat_file.write('Mean: ' + str(bn.nanmean(mask)) + '\t') flat_file.write('Standard deviation: ' + str(bn.nanstd(mask)) + '\n') Nans = np.where(np.isnan(mask)) flat_file.write('Num NaN: ' + str(Nans[0].size) + '\n') print('Processing flat ' + str(i + 1) + ' of ' + str(len(files))) temp_im = np.array(mask)[20:1000, 20:1000] num_spots = 0 dust_spots = [] for y in range(0, temp_im.shape[0]): for x in range(0, temp_im.shape[1]): if count_spots(temp_im, temp_im[y][x], y, x): num_spots += 1 dust_spots.append((y + 20, x + 20)) flat_file.write('Number of dust spots: ' + str(num_spots) + '\n\n') flat_file.close()
def test_window_sim(simfile, radius, N, SNR, output, observe=True): x, l = get_line(simfile, radius, observe=observe) # windows = np.linspace(0.8,0.999,50) windows = np.linspace(20,400,50.) digi_windows = np.empty(windows.shape) results = np.empty((windows.size,4,2)) if output: pp = PDF(output) ax0 = plt.figure().add_subplot(111) ax0.set_title('{}\nSNR={}'.format(time.asctime(),SNR)) ax0.set_xlabel('Velocity [km/s]') ax0.set_ylabel('Flux') true_moments = ADE.ADE_moments(x,l) print '{:>4}{:>10}{:>10}{:>10}{:>10}{:>10}{:>10}{:>10}{:>10}'.\ format('idx','window','low','high','idxsize','centerstd','mean','std','SN') for i, window in enumerate(windows): win_res = np.empty((N,4)) window_N = np.empty((N,)) # peak, _, _, _ = ADE.ADE_moments(x,l) # # cdf = np.cumsum(ln/np.sum(ln)) # # low, high = np.interp([1-window,window],cdf,x) # low = peak - window/2. # high = peak + window/2. # idx = np.where((x > low) & (x <= high))[0] centers = np.array([]) for j in range(N): noise = get_noise(x, l,SNR) ln = l + noise peak, _, _, _ = ADE.ADE_moments(x,ln) centers = np.append(centers,peak) cdf = np.cumsum(ln/np.sum(ln)) ##low, high = np.interp([1-window,window],cdf,x) low = peak - window/2. high = peak + window/2. idx = np.where((x > low) & (x <= high))[0] window_N[j] = x[idx[-1]] - x[idx[0]] if j == 0: print '{:4}{:10.3f}{:10.3f}{:10.3f}{:10n}'.\ format(i,window_N[j]/np.sqrt(true_moments[1]),low,high,idx.size), win_res[j] = ADE.ADE_moments(x[idx],ln[idx]) if i == 7: line = ax0.plot(x,l)[0] ax0.axvline(x=x[idx[0]],color=line.get_color()) ax0.axvline(x=x[idx[-1]],color=line.get_color()) ax0.axvline(x=low,ls=':',lw=0.4,color=line.get_color()) ax0.axvline(x=high,ls=':',lw=0.2,color=line.get_color()) del ax0.lines[-5] if i == 0: ax0.plot(x,ln) #if i % 10 == 0 or i == windows.size - 1: if i in [88,109]: line = ax0.plot(x,l)[0] ax0.axvline(x=x[idx[0]],label='{:5.3f}'.format(window/np.sqrt(true_moments[1])),color=line.get_color()) ax0.axvline(x=x[idx[-1]],color=line.get_color()) ax0.axvline(x=low,ls=':',color=line.get_color()) ax0.axvline(x=high,ls=':',color=line.get_color()) del ax0.lines[-5] print '{:9.5f}'.format(np.std(centers)), measured_vals = bn.nanmean(win_res,axis=0) measured_stds = bn.nanstd(win_res,axis=0) print '{:9.3f}{:10.3f}{:10.3f}'.format(measured_vals[0],measured_stds[0],measured_vals[0]/measured_stds[0]) digi_windows[i] = bn.nanmean(window_N) results[i,:,0] = measured_vals results[i,:,1] = measured_stds ax0.legend(loc=0,frameon=False,fontsize=10,title='Window/$\sqrt{\mu_2}$') fig = plt.figure(figsize=(8,10)) fig.suptitle('{}\nSNR={}'.format(time.asctime(),SNR)) ax = fig.add_subplot(211) ax.set_xlabel('Window width/$\sqrt{\mu_{2,true}}$') ax.set_ylabel('SNR') ax2 = fig.add_subplot(212) ax2.set_xlabel('Window width/$\sqrt{\mu_{2,true}}$') ax2.set_ylabel('$\mu_{i,meas}/\mu_{i,true}$') for i in range(4): sn = np.sqrt((results[:,i,0]/results[:,i,1])**2) # ax.plot(digi_windows/np.sqrt(true_moments[1]),sn,label='$\mu_{{{}}}$'.format(i+1)) ax.plot(windows/np.sqrt(true_moments[1]),sn,':') if i == 7: print 'tat' ax2.plot(windows/np.sqrt(true_moments[1]),np.sqrt(results[:,i,0]/true_moments[i]),label='$\sqrt{{\mu_{{{}}}}}$'.format(i+1)) else: ax2.plot(windows/np.sqrt(true_moments[1]),results[:,i,0]/true_moments[i],label='$\mu_{{{}}}$'.format(i+1)) ax2.legend(loc=0) ax2.axhline(y=1,ls=':') ax2.set_ylim(-2,1.5) if output: pp.savefig(fig) pp.savefig(ax0.figure) pp.close() plt.close('all') return windows, results, [fig, ax0.figure]
plt.xlabel("theta") plt.ylabel("brightness") plt.grid(axis='y') plt.grid(which='minor', axis='x', alpha=0.3, linestyle='-', linewidth=0.1) plt.grid(which='major', axis='x', alpha=0.6, linestyle='-', linewidth=0.1) plt.legend() plt.axis("tight") plt.xlim(0.0, 360.0) plt.title("Normalized average brightness profiles versus angle") # plt.ylim(0.0, 3.0) sbright = gauss_highpass_filter(sbright, smooth_scale, fs) kbright = gauss_highpass_filter(kbright, smooth_scale, fs) print(bn.nanmin(kbright), bn.nanmean(kbright), bn.nanmax(kbright), bn.nanstd(kbright)) print(bn.nanmin(sbright), bn.nanmean(sbright), bn.nanmax(sbright), bn.nanstd(sbright)) # normalize by std sbright /= bn.nanstd(sbright) kbright /= bn.nanstd(kbright) # positive and negative correlations corr = kbright*sbright pmask = corr > 0.0 nmask = ~pmask print() print(*["------"]*6, sep="\t") print("Octant", "Th_1", "Th_2", "Pos", "Neg", "Diff", sep="\t")
def ndcombine( arr, mask=None, copy=True, blank=np.nan, offsets=None, thresholds=[-np.inf, np.inf], zero=None, scale=None, weight=None, statsec=None, zero_kw={ 'cenfunc': 'median', 'stdfunc': 'std', 'std_ddof': 1 }, scale_kw={ 'cenfunc': 'median', 'stdfunc': 'std', 'std_ddof': 1 }, zero_to_0th=True, scale_to_0th=True, scale_sample=None, zero_sample=None, reject=None, cenfunc='median', sigma=[3., 3.], maxiters=3, ddof=1, nkeep=1, maxrej=None, n_minmax=[1, 1], rdnoise=0., gain=1., snoise=0., pclip=-0.5, combine='average', dtype='float32', memlimit=2.5e+9, irafmode=True, verbose=False, full=False, ): if copy: arr = arr.copy() if np.array(arr).ndim == 1: raise ValueError("1-D array combination is not supported!") _mask = _set_mask(arr, mask) # _mask = propagated through this function. sigma_lower, sigma_upper = _set_sigma(sigma) nkeep, maxrej = _set_keeprej(arr, nkeep, maxrej, axis=0) cenfunc = _set_cenfunc(cenfunc) reject = _set_reject_name(reject) maxiters = int(maxiters) ddof = int(ddof) ndim = arr.ndim ncombine = arr.shape[0] combfunc = _set_combfunc(combine, nameonly=False, nan=True) # == 01 - Thresholding + Initial masking ================================ # # Updating mask: _mask = _mask | mask_thresh mask_thresh = _set_thresh_mask(arr=arr, mask=_mask, thresholds=thresholds, update_mask=True) # if safemode: # # Backup the pixels which are rejected by thresholding and # # initial mask for future restoration (see below) for debugging # # purpose. # backup_thresh = arr[mask_thresh] # backup_thresh_inmask = arr[_mask] arr[_mask] = np.nan # ----------------------------------------------------------------------- # # == 02 - Calculate zero, scale, weights ================================ # # This should be done before rejection but after threshold masking.. zeros, scales, weights = get_zsw(arr=arr, zero=zero, scale=scale, weight=weight, zero_kw=zero_kw, scale_kw=scale_kw, zero_to_0th=zero_to_0th, scale_to_0th=scale_to_0th) arr = do_zs(arr, zeros=zeros, scales=scales) # ----------------------------------------------------------------------- # # == 02 - Rejection ===================================================== # if isinstance(reject, str): if reject == 'sigclip': _mask_rej, low, upp, nit, rejcode = sigclip_mask( arr, mask=_mask, sigma_lower=sigma_lower, sigma_upper=sigma_upper, maxiters=maxiters, ddof=ddof, nkeep=nkeep, maxrej=maxrej, cenfunc=cenfunc, axis=0, irafmode=irafmode, full=True) # _mask is a subset of _mask_rej, so to extract pixels which # are masked PURELY due to the rejection is: mask_rej = _mask_rej ^ _mask elif reject == 'minmax': pass elif reject == 'ccdclip': _mask_rej, low, upp, nit, rejcode = ccdclip_mask( arr, mask=_mask, sigma_lower=sigma_lower, sigma_upper=sigma_upper, scale_ref=np.mean(scales), zero_ref=np.mean(zeros), maxiters=maxiters, ddof=ddof, nkeep=nkeep, maxrej=maxrej, cenfunc=cenfunc, axis=0, gain=gain, rdnoise=rdnoise, snoise=snoise, irafmode=irafmode, full=True) # _mask is a subset of _mask_rej, so to extract pixels which # are masked PURELY due to the rejection is: mask_rej = _mask_rej ^ _mask elif reject == 'pclip': pass else: raise ValueError("reject not understood.") elif reject is None: mask_rej = _set_mask(arr, None) low = bn.nanmin(arr, axis=0) upp = bn.nanmax(arr, axis=0) nit = None rejcode = None else: raise ValueError("reject not understood.") _mask |= mask_rej # ----------------------------------------------------------------------- # # TODO: add "grow" rejection here? # == 03 - combine ======================================================= # # Replace rejected / masked pixel to NaN and backup for debugging purpose. # This is done to reduce memory (instead of doing _arr = arr.copy()) # backup_nan = arr[_mask] arr[_mask] = np.nan # Combine and calc sigma comb = combfunc(arr, axis=0) if full: sigma = bn.nanstd(arr, axis=0) # Restore NaN-replaced pixels of arr for debugging purpose. # arr[_mask] = backup_nan # arr[mask_thresh] = backup_thresh_inmask if full: return comb, sigma, mask_rej, mask_thresh, low, upp, nit, rejcode else: return comb
def stats(self, lmean=False, lmed=False, lskew=False, lvar=False, lstd=False, lcoefvar=False, lperc=False, p=0.95): """Calculate some statistics among every realisation. Each statistic is calculated node-wise along the complete number of realisations. Parameters ---------- lmean : boolean, default False Calculate the mean. lmed : boolean, default False Calculate the median. lskew : boolean, default False Calculate skewness. lvar : boolean, default False Calculate the variance. lstd : boolean, default False Calculate the standard deviation. lcoefvar : boolean, default False Calculate the coefficient of variation. lperc : boolean, default False Calculate the percentile `100 * (1 - p)`. p : number, default 0.95 Probability value. Returns ------- retdict : dict of GridArr Dictionary containing one GridArr for each calculated statistic. See Also -------- stats_area : same but considering a circular (and horizontal) area of a specified radius around a given point. """ # check if the map files are already opened or not if isinstance(self.files[0], file): opened_files = True else: opened_files = False if lmean: meanmap = np.zeros(self.cells) if lmed: medmap = np.zeros(self.cells) if lskew: skewmap = np.zeros(self.cells) if lvar: varmap = np.zeros(self.cells) if lstd: stdmap = np.zeros(self.cells) if lcoefvar: coefvarmap = np.zeros(self.cells) if lperc: percmap = np.zeros((self.cells, 2)) arr = np.zeros(self.nfiles) skip = True offset = os.SEEK_SET for cell in xrange(self.cells - self.header): for i, gridfile in enumerate(self.files): # deal with map files not open yet if opened_files: grid = gridfile else: grid = open(gridfile, 'rb') grid.seek(offset) if skip: skip_lines(grid, self.header) arr[i] = grid.readline() if not opened_files: offset = grid.tell() grid.close() skip = False # replace no data's with NaN bn.replace(arr, self.nodata, np.nan) if lmean: meanmap[cell] = bn.nanmean(arr) if lmed: medmap[cell] = bn.nanmedian(arr) if lskew: skewmap[cell] = pd.Series(arr).skew() if lvar: varmap[cell] = bn.nanvar(arr, ddof=1) if lstd: stdmap[cell] = bn.nanstd(arr, ddof=1) if lcoefvar: if lstd and lmean: coefvarmap[cell] = stdmap[cell] / meanmap[cell] * 100 else: std = bn.nanstd(arr, ddof=1) mean = bn.nanmean(arr) coefvarmap[cell] = std / mean * 100 if lperc: percmap[cell] = pd.Series(arr).quantile([(1 - p) / 2, 1 - (1 - p) / 2]) retdict = dict() if lmean: meangrid = GridArr(name='meanmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=meanmap) retdict['meanmap'] = meangrid if lmed: medgrid = GridArr(name='medianmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=medmap) retdict['medianmap'] = medgrid if lskew: skewgrid = GridArr(name='skewmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=skewmap) retdict['skewmap'] = skewgrid if lvar: vargrid = GridArr(name='varmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=varmap) retdict['varmap'] = vargrid if lstd: stdgrid = GridArr(name='stdmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=stdmap) retdict['stdmap'] = stdgrid if lcoefvar: coefvargrid = GridArr(name='coefvarmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=coefvarmap) retdict['coefvarmap'] = coefvargrid if lperc: percgrid = GridArr(name='percmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=percmap) retdict['percmap'] = percgrid return retdict
# noise_std[n-1] = bn.nanstd(input_images[0:n+1]) # How does standard deviation of a frame vary with number of frames group averaged together? # And how does the difference in pixel intensity between frames vary with number of frames group averaged together? # curtailed length of output array by a factor of 4 to speed this up. noise_std_groupmean = np.empty(npts) noise_dif_groupmean = np.empty(npts) # print bn.nanmean(input_images[0:1], axis=0) # print bn.nanmean(input_images[2:3], axis=0) # print bn.nanmean(np.subtract(bn.nanmean(input_images[0:1], axis=0),bn.nanmean(input_images[2:3], axis=0))) # for n in tqdm(range(1,len(input_images)/4)) : for n in tqdm(range(0, npts)): frame = nFrames / npts * (n+1) - 1 #print frame mean = bn.nanstd(bn.nanmean(input_images[0:frame], axis=0)) noise_std_groupmean[n] = mean noise_dif_groupmean[n] = bn.nanstd(np.subtract(bn.nanmean(input_images[0:(frame+1)/2-1], axis=0), bn.nanmean(input_images[(frame+1)/2:frame], axis=0))) #noise_dif_groupmean[n] #Plotting ypts = nFrames/ npts * (np.arange(npts) +1) with open(filename +'_shuffled_out.csv', 'wb') as csvfile: wr = csv.writer(csvfile, quoting=csv.QUOTE_ALL) wr.writerows([ypts, noise_std_groupmean, ypts/2, noise_dif_groupmean])
def stats_area(self, loc, tol=0, lmean=False, lmed=False, lskew=False, lvar=False, lstd=False, lcoefvar=False, lperc=False, p=0.95, save=False): """Calculate some statistics among every realisation, considering a circular (only horizontaly) area of radius `tol` around the point located at `loc`. Parameters ---------- loc : array_like Location of the vertical line [x, y]. tol : number, default 0 Tolerance radius used to search for neighbour nodes. lmean : boolean, default False Calculate the mean. lmed : boolean, default False Calculate the median. lskew : boolean, default False Calculate skewness. lvar : boolean, default False Calculate the variance. lstd : boolean, default False Calculate the standard deviation. lcoefvar : boolean, default False Calculate the coefficient of variation. lperc : boolean, default False Calculate the percentile `100 * (1 - p)`. p : number, default 0.95 Probability value. save : boolean, default False Write the points used to calculate the chosen statistics in PointSet format to a file named 'sim values at (x, y, line).prn'. Returns ------- statspset : PointSet PointSet instance containing the calculated statistics. .. TODO: checkar stats variance com geoms """ if lmean: meanline = np.zeros(self.dz) if lmed: medline = np.zeros(self.dz) if lskew: skewline = np.zeros(self.dz) if lvar: varline = np.zeros(self.dz) if lstd: stdline = np.zeros(self.dz) if lcoefvar: coefvarline = np.zeros(self.dz) if lperc: percline = np.zeros((self.dz, 2)) # convert the coordinates of the first point to grid nodes loc = coord_to_grid(loc, [self.cellx, self.celly, self.cellz], [self.xi, self.yi, self.zi])[:2] # find the nodes coordinates within a circle centred in the first point neighbours_nodes = circle(loc[0], loc[1], tol) # compute the lines numbers for each point in the neighbourhood, across # each grid layer. this yields a N*M matrix, with N equal to the number # of neighbour nodes, and M equal to the number of layers in the grid. neighbours_lines = [line_zmirror(node, [self.dx, self.dy, self.dz]) for node in neighbours_nodes] # sort the lines in ascending order neighbours_lines = np.sort(neighbours_lines, axis=0) # create an array to store the neighbour nodes in each grid file nnodes = neighbours_lines.shape[0] arr = np.zeros(self.nfiles * nnodes) skip = True curr_line = np.zeros(self.nfiles) for layer in xrange(neighbours_lines.shape[1]): for i, line in enumerate(neighbours_lines[:, layer]): for j, grid in enumerate(self.files): # skip header lines only once per grid file if skip and self.header: skip_lines(grid, self.header) # advance to the next line with a neighbour node skip_lines(grid, int(line - curr_line[j] - 1)) # read the line and store its value a = grid.readline() arr[i + j * nnodes] = float(a) curr_line[j] = line skip = False # replace no data's with NaN bn.replace(arr, self.nodata, np.nan) # compute the required statistics if lmean: meanline[layer] = bn.nanmean(arr) if lmed: medline[layer] = bn.nanmedian(arr) if lskew: skewline[layer] = pd.Series(arr).skew() if lvar: varline[layer] = bn.nanvar(arr, ddof=1) if lstd: stdline[layer] = bn.nanstd(arr, ddof=1) if lcoefvar: if lstd and lmean: coefvarline[layer] = stdline[layer] / meanline[layer] * 100 else: std = bn.nanstd(arr, ddof=1) mean = bn.nanmean(arr) coefvarline[layer] = std / mean * 100 if lperc: percline[layer] = pd.Series(arr).quantile([(1 - p) / 2, 1 - (1 - p) / 2]) if save and tol == 0: # FIXME: not working with the tolerance feature # need to adjust the arrpset or cherry-pick arr arrpset = PointSet('realisations at location ({0}, {1}, {2})'. format(loc[0], loc[1], layer * self.cellz + self.zi), self.nodata, 3, ['x', 'y', 'value'], values=np.zeros((self.nfiles, 3))) arrout = os.path.join(os.path.dirname(self.files[0].name), 'sim values at ({0}, {1}, {2}).prn'.format( loc[0], loc[1], layer * self.cellz + self.zi)) arrpset.values.iloc[:, 2] = arr arrpset.values.iloc[:, :2] = np.repeat(np.array(loc) [np.newaxis, :], self.nfiles, axis=0) arrpset.save(arrout, header=True) ncols = sum((lmean, lmed, lvar, lstd, lcoefvar, lskew)) if lperc: ncols += 2 statspset = PointSet(name='vertical line stats at (x,y) = ({0},{1})'. format(loc[0], loc[1]), nodata=self.nodata, nvars=3 + ncols, varnames=['x', 'y', 'z'], values=np.zeros((self.dz, 3 + ncols))) statspset.values.iloc[:, :3] = (np.column_stack (((np.repeat(np.array(loc) [np.newaxis, :], self.dz, axis=0)), np.arange(self.zi, self.zi + self.cellz * self.dz)))) j = 3 if lmean: statspset.varnames.append('mean') statspset.values.iloc[:, j] = meanline j += 1 if lmed: statspset.varnames.append('median') statspset.values.iloc[:, j] = medline j += 1 if lskew: statspset.varnames.append('skewness') statspset.values.iloc[:, j] = skewline j += 1 if lvar: statspset.varnames.append('variance') statspset.values.iloc[:, j] = varline j += 1 if lstd: statspset.varnames.append('std') statspset.values.iloc[:, j] = stdline j += 1 if lcoefvar: statspset.varnames.append('coefvar') statspset.values.iloc[:, j] = coefvarline j += 1 if lperc: statspset.varnames.append('lperc') statspset.varnames.append('rperc') statspset.values.iloc[:, -2:] = percline # reset the reading pointer in each grid file self.reset_read() # update varnames statspset.flush_varnames() return statspset
def height_plot_across_folders(folder_list, inputsuffix='allz2.dat', label='Mean Light Weighted Age [Gyr]', col=6, errcol=None, lowhigh=False, order=5, ylims=None, bigpoints=False, binz=True, combine_all=False, plot_std=False, exclude=[[],[],[],[],[],[]]): axlist = [] plist = [6,3,4,2,1,5] #color_list = ['blue','turquoise','chartreuse','yellow','tomato','red'] color_list = ['blue','seagreen','darkorange','crimson','dimgray','mediumorchid','lightblue'] style_list = ['-','-','-','-','-','-','-'] if not isinstance(col,list): col = [col] * len(folder_list) for i in range(6): pointing = plist[i] ax = plt.figure().add_subplot(111) ax.set_xlabel('|Height [kpc]|') ax.set_ylabel(label) ax.set_title('{}\nP{}'.format(time.asctime(),pointing)) for f, folder in enumerate(folder_list): color = color_list[f] style = style_list[f] dat = glob('{}/*P{}*{}'.format(folder, pointing, inputsuffix))[0] print dat loc = glob('{}/*P{}*locations.dat'.format(folder, pointing))[0] print loc print 'Excluding: ', exclude[pointing-1] if errcol == None: td = np.loadtxt(dat, usecols=(col[f],), unpack=True) else: if lowhigh: td, low, high = np.loadtxt(dat, usecols=(col[f],errcol,errcol+1), unpack=True) te = np.vstack((low,high)) else: td, te = np.loadtxt(dat, usecols=(col[f],errcol), unpack=True) r, tz = np.loadtxt(loc, usecols=(4,5), unpack=True) exarr = np.array(exclude[pointing-1])-1 #becuase aps are 1-indexed td = np.delete(td,exarr) r = np.delete(r,exarr) tz = np.delete(tz,exarr) if errcol != None: if lowhigh: te = np.delete(te,exarr,axis=1) else: te = np.delete(te,exarr) alpha=1.0 if combine_all and f == 0: bigD = np.zeros(td.size) alpha=0.3 if binz: z = np.array([]) d = np.array([]) e = np.array([]) while tz.size > 0: zi = tz[0] idx = np.where(np.abs(tz - zi) < 0.05) d = np.r_[d,np.mean(td[idx])] e = np.r_[e,np.std(td[idx])] z = np.r_[z,np.abs(zi)] tz = np.delete(tz, idx) td = np.delete(td, idx) else: z = tz d = td if errcol == None: e = np.zeros(tz.size) else: e = te if combine_all: bigD = np.vstack((bigD,d)) bigz = z gidx = d == d d = d[gidx] z = z[gidx] if lowhigh: e = e[:,gidx] else: e = e[gidx] sidx = np.argsort(z) dp = np.r_[d[sidx][order::-1],d[sidx]] zp = np.r_[z[sidx][order::-1],z[sidx]] mean = bn.move_mean(dp,order)[order+1:] std = bn.move_std(dp,order)[order+1:] spl = spi.UnivariateSpline(z[sidx],d[sidx]) mean = spl(z[sidx]) # mean = np.convolve(d[sidx],np.ones(order)/order,'same') # std = np.sqrt(np.convolve((d - mean)**2,np.ones(order)/order,'same')) # ax.plot(z[sidx],mean,color=color, ls=style, label=folder, alpha=alpha) # ax.fill_between(z[sidx],mean-std,mean+std, alpha=0.1, color=color) # print d.shape, np.sum(e,axis=0).shape # d = d/np.sum(e,axis=0) # e = np.diff(e,axis=0)[0] # print e.shape ax.errorbar(z, d, yerr=e, fmt='.', color=color,alpha=alpha,capsize=0, label=folder) ax.set_xlim(-0.1,2.6) if ylims is not None: ax.set_ylim(*ylims) ax.legend(loc=0,numpoints=1) if combine_all: sidx = np.argsort(bigz) bigD = bigD[1:] bigMean = bn.nanmean(bigD,axis=0) bigStd = bn.nanstd(bigD,axis=0) bigspl = spi.UnivariateSpline(bigz[sidx],bigMean[sidx]) bigFit = bigspl(bigz[sidx]) ax.plot(bigz[sidx], bigFit, 'k-', lw=2) ax.errorbar(bigz, bigMean, yerr=bigStd, fmt='.', color='k',capsize=0) axlist.append(ax) if combine_all and plot_std: ax2 = plt.figure().add_subplot(111) ax2.set_xlabel('|Height [kpc]|') ax2.set_ylabel('$\delta$'+label) ax2.set_title(ax.get_title()) ax2.plot(bigz, bigStd, 'k') axlist.append(ax2) return axlist
def test_window(moment_list, N, SNR, output): x, l = make_line(moment_list) windows = np.linspace(0.8,0.99,50) results = np.empty((windows.size,4,2)) if output: pp = PDF(output) ax0 = plt.figure().add_subplot(111) ax0.set_title('{}\nSNR = {}'.format(time.asctime(),SNR)) ax0.set_xlabel('Velocity [km/s]') ax0.set_ylabel('Flux') for i, window in enumerate(windows): win_res = np.empty((N,4)) for j in range(N): noise = get_noise(x, l,SNR) ln = l + noise cdf = np.cumsum(ln/np.sum(ln)) low, high = np.interp([1-window,window],cdf,x) idx = np.where((x > low) & (x <= high)) win_res[j] = ADE.ADE_moments(x[idx],ln[idx]) if i == 0: ax0.plot(x,ln) if i % 5 == 0 or i == windows.size - 1: line = ax0.plot(x,l)[0] ax0.axvline(x=low,label='{:5.3f}'.format(window),color=line.get_color()) ax0.axvline(x=high,color=line.get_color()) del ax0.lines[-3] measured_vals = bn.nanmean(win_res,axis=0) measured_stds = bn.nanstd(win_res,axis=0) # print win_res # print measured_vals # print measured_stds # raw_input('') results[i,:,0] = measured_vals results[i,:,1] = measured_stds ax0.legend(loc=0,frameon=False,fontsize=10,title='Window') fig = plt.figure(figsize=(8,10)) fig.suptitle('{}\nSNR = {}'.format(time.asctime(),SNR)) ax = fig.add_subplot(211) ax.set_xlabel('Window (1-X/X)') ax.set_ylabel('SNR') ax2 = fig.add_subplot(212) ax2.set_xlabel('Window (1-X/X)') ax2.set_ylabel('$\mu_{i,meas}/\mu_{i,true}$') for i in range(4): sn = np.sqrt((results[:,i,0]/results[:,i,1])**2) ax.plot(windows,sn,label='$\mu_{{{}}}$'.format(i+1)) print i if i == 1: print 'Yaya!' ax2.plot(windows,np.sqrt(results[:,i,0]/moment_list[i+1]),label='$\sqrt{\mu_{{{}}}}$'.format(i+1)) else: ax2.plot(windows,results[:,i,0]/moment_list[i+1],label='$\mu_{{{}}}$'.format(i+1)) ax.legend(loc=0) ax2.legend(loc=0) if output: pp.savefig(fig) pp.savefig(ax0.figure) pp.close() plt.close('all') return windows, results, [fig, ax0.figure()]
def test_window_line(x, l, N, SNR, output, observe=True, smooth=1, resamp=False): # if smooth: # l = ndimage.gaussian_filter1d(l,smooth) if resamp: oldx = x.copy() resamp_x = np.linspace(x.min(),x.max(),1000.) widths = np.arange(3,resamp_x.size*0.666,dtype=np.int) else: widths = np.arange(3,x.size*0.666,dtype=np.int) results = np.empty((widths.size,4,2)) if output: pp = PDF(output) fig = plt.figure(figsize = (10,8)) ax0 = fig.add_subplot(221) ax0.set_xlabel('Velocity [km/s]') ax0.set_ylabel('Flux') true_moments = ADE.ADE_moments(x,l) for i in range(4): true_moments[i] = np.sign(true_moments[i])*((np.sign(true_moments[i])*true_moments[i])**(1./(i+1))) ax0.text(0.7,0.8,'$\zeta_1=$ {:4.2e}\n$\zeta_2=$ {:4.2e}\n$\zeta_3=$ {:4.2e}\n$\zeta_4=$ {:4.2e}'.format(*true_moments),ha='left',va='top',fontsize=8,transform=ax0.transAxes) print 'True moments: {}'.format(true_moments) print "True \\sqrt{{\\mu_2}} = {:5.2f}".format(true_moments[1]) print "Orig px resolution = {:5.2f} km/s +/- {:5.2f}".\ format(np.mean(np.diff(x)),np.std(np.diff(x))) print "Interpolated px resolution = {:5.2f} km/s +/- {:5.2f}\n".\ format(np.mean(np.diff(resamp_x)),np.std(np.diff(resamp_x))) print '{:>4}{:>10}{:>10}{:>10}{:>10}{:>10}{:>10}{:>10}'.\ format('idx','window','centidx','idxsize','centerstd','mean','std','SN') for i, width in enumerate(widths): win_res = np.empty((N,4)) # peak, _, _, _ = ADE.ADE_moments(x,l) # cent_idx = np.argmin(np.abs(x - peak)) # idx = np.arange(cent_idx - width/2., cent_idx + width/2., dtype=np.int) centers = np.array([]) for j in range(N): noise = get_noise(x, l,SNR) ln = l + noise if observe and resamp: ln = np.interp(resamp_x,x,ln) x = resamp_x peak, _, _, _ = ADE.ADE_moments(x,ln) cent_idx = np.argmin(np.abs(x - peak)) centers = np.append(centers,cent_idx) idx = np.arange(cent_idx - width/2., cent_idx + width/2., dtype=np.int) if np.any(idx > x.size): idx = np.arange(idx[0],x.size - 1) if j == 0: print '{:4}{:10.3f}{:10n}{:10n}'.\ format(i,width,cent_idx,idx.size), moments = ADE.ADE_moments(x[idx],ln[idx]) for k in range(4): moments[k] = np.sign(moments[k])*((np.sign(moments[k])*moments[k])**(1./(1+k))) win_res[j] = moments if resamp: x = oldx # if i == 7: # line = ax0.plot(x,l)[0] # ax0.axvline(x=x[idx[0]],color=line.get_color()) # ax0.axvline(x=x[idx[-1]],color=line.get_color()) # ax0.axvline(x=low,ls=':',lw=0.4,color=line.get_color()) # ax0.axvline(x=high,ls=':',lw=0.2,color=line.get_color()) # del ax0.lines[-5] if i == 0: if resamp: ax0.plot(resamp_x,ln) else: ax0.plot(x,ln) #if i % 10 == 0 or i == windows.size - 1: # if i in [-8,-10]: # line = ax0.plot(x,l)[0] # ax0.axvline(x=x[idx[0]],label='{:5.3f}'.format(window/np.sqrt(true_moments[1])),color=line.get_color()) # ax0.axvline(x=x[idx[-1]],color=line.get_color()) # ax0.axvline(x=low,ls=':',color=line.get_color()) # ax0.axvline(x=high,ls=':',color=line.get_color()) # del ax0.lines[-5] print '{:9.5f}'.format(np.std(centers)), measured_vals = bn.nanmean(win_res,axis=0) measured_stds = bn.nanstd(win_res,axis=0) print '{:9.3f}{:10.3f}{:10.3f}'.format(measured_vals[2],measured_stds[2],measured_vals[2]/measured_stds[2]) results[i,:,0] = measured_vals results[i,:,1] = measured_stds ax0.legend(loc=0,frameon=False,fontsize=10,title='Window/$\sqrt{\mu_2}$') # fig.subplots_adjust(hspace=0.0001) fig.suptitle('{}\nSNR={}, $\zeta_{{2,true}}$={:4.2f} km/s'.format( time.asctime(),SNR,true_moments[1])) ax = fig.add_subplot(222) ax.set_ylabel('1/Noise') ax.set_yscale('log') ax.set_xlabel('Window width [px]') # plt.setp(ax.get_xticklabels(),visible=False) axkm = ax.twiny() axkm.set_xlabel('Window width [km/s]') ax2 = fig.add_subplot(223) ax2.set_ylabel('$\zeta_{i,meas}/\zeta_{i,true}$') ax2.set_xlabel('Window width [km/s]') # plt.setp(ax2.get_xticklabels(),visible=False) ax3 = fig.add_subplot(224) ax3km = ax3.twiny() ax3km.set_xlabel('Window width/$\zeta_{{2,true}}$') # ax3.set_yscale('log') ax3.set_ylim(0,1.1) ax3.set_xlabel('Window width [px]') ax3.set_ylabel('1/Noise $\\times\, \zeta_{i,meas}/\zeta_{i,true}$ (normalized)') for i in range(4): sn = 1./results[:,i,1] ax.plot(widths,sn/sn[100:].max(),label='$\zeta_{{{}}}$'.format(i+1)) if resamp: axkm.plot(widths*np.mean(np.diff(resamp_x)),sn) else: axkm.plot(widths*np.mean(np.diff(x)),sn) del axkm.lines[-1] # if i == 7: # print 'tat' # ax2.plot(windows/np.sqrt(true_moments[1]),np.sqrt(results[:,i,0]/true_moments[i]),label='$\sqrt{{\mu_{{{}}}}}$'.format(i+1)) # ax2.plot(widths,results[:,i,0]/true_moments[i],label='$\mu_{{{}}}$'.format(i+1)) ax2.plot(widths,results[:,i,0]/true_moments[i],label='$\zeta_{{{}}}$'.format(i+1)) zeta = results[:,i,0]/true_moments[i]*sn zeta /= zeta[100:].max() ax3.plot(widths,zeta) if resamp: ax3km.plot(widths*np.mean(np.diff(resamp_x))/true_moments[1],zeta) else: ax3km.plot(widths*np.mean(np.diff(x))/np.sqrt(true_moments[1]),zeta) del ax3km.lines[-1] ax.legend(loc=0) # ax.set_ylim(0,1000) ax2.axhline(y=1,ls=':') ax2.set_ylim(-0.1,1.1) if output: plt.tight_layout() pp.savefig(fig) pp.close() plt.close('all') return widths, results, [fig, ax0.figure], true_moments
def deltaconvert(series, visualize=False, max_adj_outliers=10): """Perform delta-conversion to given pd.Series. Delta-conversion returns 3 series as a tuple and possibly error message. First series (D) contains daily returns where all the data has been removed that could make comparison difficult with other assets. Second series (W) contains weekly returns where all the data has been removed that could make comparison difficult with other assets. Third series (DS) contains daily returns where all the outliers and erroneus data points have been removed but holes in data are not taken into account. This is more suitable for calculating performance scores etc. Arguments: series -- series to use visualize -- visualize results max_adj_outliers -- maximum number of adjancent outliers, if there are actually more adjancent outliers than this then they will not be considered outliers anymore. default value: 10 """ # MEDIAN_LEN = 50 ZSCORE_CUT_RATIO = 2 series = series.dropna() if len(series) < 50: raise DeltaConversionException("Not enough data") lines_taken = 0 if series.index[0] > series.index[-1]: raise DeltaConversionException("Wrong cronological order") # closes = [] # dates = [] # datesord = [] # for line in lines: # splitted = line.split(",") # closes.append(float(splitted[column])) # dt = datetime.strptime(splitted[0], "%Y-%m-%d").date() # dates.append(dt) # datesord.append(dt.toordinal()) # if datesord[-1] < datesord[0]: # closes.reverse() # dates.reverse() # datesord.reverse() # lines.reverse() closes = series dates = series.index num_invalid_prices = 0 deltapct = [np.nan] changescores = [np.nan] invalid_price_indices = [] for i in range(1, len(series)): if closes[i - 1] > 0 and closes[i] > 0: change = closes[i] / closes[i - 1] deltapct.append(change - 1) changescore = change_to_score(change) changescores.append(changescore) else: deltapct.append(np.nan) changescores.append(np.nan) num_invalid_prices += 1 invalid_price_indices.append(i) logging.debug("Cannot determine changescore at {} ({} / {})".format(dates[i], closes[i], closes[i - 1])) # # remove zeroes (data may only end with price zero if stock goes bankrupt...) # first_nonzero_idx = [i for i, val in enumerate(closes[:-1]) if val == 0] # del closes[:first_nonzero_idx] # del dates[:first_nonzero_idx] # lines_taken += first_nonzero_idx # if first_nonzero_idx > 0: # logging.debug("{}: removed {} zero-lines from the beginning.".format(filename, first_nonzero_idx)) num_gaps = 0 num_invalid_chrono_orders = 0 gap_indices = [] for i in range(len(dates) - 1, 0, -1): d = (dates[i] - dates[i - 1]).days # standard weekends are only allowed if d == 3: if dates[i].weekday() != 0: # not monday # deltapct[i] = np.nan # changescores[i] = np.nan num_gaps += 1 gap_indices.append(i) logging.log(5, "Non-weekend gap of 2 day(s) at {}".format(dates[i])) elif d > 1: # deltapct[i] = np.nan # changescores[i] = np.nan num_gaps += 1 gap_indices.append(i) logging.log(5, "Non-weekend gap of {} day(s) at {}".format(d, dates[i])) elif d <= 0: del deltapct[i], dates[i], closes[closes.index[i]], changescores[i] logging.warning(5, "Invalid chronological order ({} day(s)) at {}" .format(d - 1, dates[i])) num_invalid_chrono_orders += 1 deltapct = np.asarray(deltapct) changescores = np.asarray(changescores) std_score = bn.nanstd(changescores) zscores = np.abs(changescores) / std_score mean_z = bn.nanmean(zscores) zscores_set = list(set(zscores[(~np.isnan(zscores)) & (zscores > 0)])) zscores_set.sort() outlier_z = None maxpctdiff = 0 for i in range(int(len(zscores_set) * .95), len(zscores_set)): pctdiff = zscores_set[i] / zscores_set[i - 1] maxpctdiff = pctdiff # logging.info("{}: {}".format(i / len(zscores_set), pctdiff)) if pctdiff >= 2: outlier_z = zscores_set[i] second_highest_z = zscores_set[i - 1] break possible_outliers = [] confirmed_outliers = [] localmean_factors = [] if outlier_z: logging.log(5, "Outlier z-score: {:.2f}, earlier z-score: {:.2f}, mean z-score: {:.5f}" .format(outlier_z, second_highest_z, mean_z)) for i in range(len(zscores)): if zscores[i] >= outlier_z: localmean = bn.nanmean(zscores[max(0, i - 50):min(len(zscores) + 1, i + 50)]) localmean_factor = np.sqrt(mean_z / localmean) score = (zscores[i] / second_highest_z) * localmean_factor logging.log(5, "Possible outlier at {}: localmean_factor: {:.2f}, zscore: {:.2f}, score: {:.2f}" .format(dates[i], localmean_factor, zscores[i], score)) if score >= ZSCORE_CUT_RATIO: logging.debug("Possible outlier at {} (z-score={:.2f}, deltapct={:.2%})" .format(dates[i], zscores[i], deltapct[i])) # deltapct[i] = np.nan possible_outliers.append(i) localmean_factors.append(localmean_factor) if len(possible_outliers) == 1: confirmed_outliers = possible_outliers for i in range(1, len(possible_outliers)): firstidx = possible_outliers[i - 1] secondidx = possible_outliers[i] # opposite signs and not too far from each other if deltapct[firstidx] * deltapct[secondidx] < 0 \ and secondidx - firstidx + 1 <= max_adj_outliers: firstnonan = None for i2 in range(firstidx, -1, -1): if not np.isnan(deltapct[i2]): firstnonan = i2 break confirmed = False if not firstnonan: confirmed = True if firstnonan: if i == 1: left_mean = bn.nanmedian(closes[max(0, firstnonan - (max_adj_outliers - 1)):firstnonan + 1]) else: left_mean = bn.nanmedian(closes[max(0, possible_outliers[i - 2], \ firstnonan - (max_adj_outliers - 1)):firstnonan + 1]) right_mean = bn.nanmedian(closes[firstidx:secondidx]) changescore = change_to_score(right_mean / left_mean) zscore = abs(changescore) / std_score score_left_vs_mid = (zscore / second_highest_z) * localmean_factors[i - 1] left_mean = right_mean right_mean = bn.nanmedian(closes[secondidx:min(secondidx + max_adj_outliers, len(closes))]) changescore = change_to_score(right_mean / left_mean) zscore = abs(changescore) / std_score score_mid_vs_right = (zscore / second_highest_z) * localmean_factors[i] if score_left_vs_mid > ZSCORE_CUT_RATIO * .75 and score_mid_vs_right > ZSCORE_CUT_RATIO * .75: confirmed = True if confirmed: indices = [i2 for i2 in range(firstidx, secondidx + 1)] deltapct[indices] = np.nan confirmed_outliers += indices else: logging.debug("No possible outliers found based on initial z-score analysis (maxpctdiff: {})" .format(maxpctdiff)) if visualize: # TODO: make this work with DataFrame pass # closes_arr = np.asarray(closes.get_values()) # datesord = np.asarray(datesord) # plt.subplot(2, 1, 1) # plt.plot(datesord - datesord[0], closes_arr, 'b*') # plt.plot(datesord[gap_indices] - datesord[0], closes_arr[gap_indices], 'ob') # plt.plot(datesord[confirmed_outliers] - datesord[0], closes_arr[confirmed_outliers], 'or') # plt.plot(datesord[invalid_price_indices] - datesord[0], closes_arr[invalid_price_indices], 'om') # plt.subplot(2, 1, 2) # plt.plot(datesord - datesord[0], zscores, 'o') # plt.show() logging.debug("Conversion result: lines = {}, invalid closes = {}, gaps = {}, invalid dates = {}, outliers = {}" .format(len(series) - lines_taken, num_invalid_prices, num_gaps, num_invalid_chrono_orders, len(confirmed_outliers))) indices_to_rem = list(set(gap_indices + confirmed_outliers + invalid_price_indices)) # datesordmod = np.delete(datesord, indices_to_rem) datesmod = dates.copy() datesmod = datesmod.delete(indices_to_rem) deltapctmod = np.delete(deltapct, indices_to_rem) closesmod = closes.drop(closes.index[indices_to_rem]) assert(not np.any(np.isnan(deltapctmod[1:]))) weeklydeltapct = [] weeklydatesmod = [] lastidx = -1 # resample to W-FRI (could be done with pandas) for i in range(len(closesmod)): if datesmod[i].weekday() == 4: dd = (datesmod[i] - datesmod[lastidx]).days if lastidx >= 0 or dd == 7: if closesmod[lastidx] >= 0: weeklydeltapct.append(closesmod[i] / closesmod[lastidx] - 1) weeklydatesmod.append(datesmod[i]) else: logging.log(5, "Weekly bar at {} skipped (delta: {} days)".format(datesmod[i], i, dd)) lastidx = i res_daily = pd.Series(deltapctmod, datesmod) res_weekly = pd.Series(weeklydeltapct, weeklydatesmod) indices_to_rem = list(set(confirmed_outliers + invalid_price_indices)) datesmod = dates.copy() datesmod = datesmod.delete(indices_to_rem) deltapctmod = np.delete(deltapct, indices_to_rem) assert(not np.any(np.isnan(deltapctmod[1:]))) res_dailyscore = pd.Series(deltapctmod, datesmod) return res_daily, res_weekly, res_dailyscore
def create_wlmap_from_skylines(hdulist): logger = logging.getLogger("SkyTrace") # imgdata = hdulist['SCI.RAW'].data try: imgdata = hdulist['SCI.NOCRJ'].data except: imgdata = hdulist['SCI'].data logger.info("Isolating sky lines and continuum") skylines, continuum = prep_science.filter_isolate_skylines(data=imgdata) fits.PrimaryHDU(data=skylines).writeto("skytrace_sky.fits", clobber=True) fits.PrimaryHDU(data=continuum).writeto("skytrace_continuum.fits", clobber=True) # pick a region close to the center, extract block of image rows, and get # line list sky1d = bottleneck.nanmean(imgdata[550:575, :].astype(numpy.float32), axis=0) print sky1d.shape sky_linelist = wlcal.find_list_of_lines(sky1d, avg_width=25, pre_smooth=None) numpy.savetxt("sky1d", sky1d) numpy.savetxt("skylines.all", sky_linelist) # select lines with good spacing good_lines = traceline.pick_line_every_separation( arc_linelist=sky_linelist, trace_every=0.02, min_line_separation=0.01, n_pixels=imgdata.shape[1], min_signal_to_noise=7, ) numpy.savetxt("skylines.good", sky_linelist[good_lines]) print "X", skylines.shape, sky_linelist.shape, good_lines.shape selected_lines = sky_linelist[good_lines] print "selected:", selected_lines.shape all_traces = [] logger.info("Tracing %d lines" % (selected_lines.shape[0])) linetraces = open("skylines.traces", "w") for idx, pick_line in enumerate(selected_lines): #print pick_line wp = trace_full_line(skylines, x_start=pick_line[0], y_start=562, window=5) numpy.savetxt(linetraces, wp) print >> linetraces, "\n" * 5 all_traces.append(wp) numpy.savetxt("skylines.picked", selected_lines) for idx in range(selected_lines.shape[0]): pick_line = selected_lines[idx, :] #print pick_line all_traces = numpy.array(all_traces) print all_traces.shape fits.PrimaryHDU(data=all_traces).writeto("alltraces.fits", clobber=True) ########################################################################## # # Now do some outlier rejection # ########################################################################## # # Compute average profile shape and mean intensity profile # logger.info("Rejecting outliers along the spatial profile") _cl, _cr = int(0.4 * all_traces.shape[1]), int(0.6 * all_traces.shape[1]) central_position = numpy.median(all_traces[:, _cl:_cr, :], axis=1) numpy.savetxt("skytrace_median", central_position) print central_position # subtract central position all_traces[:, :, 1] -= central_position[:, 1:2] all_traces[:, :, 2] -= central_position[:, 2:3] # scale intensity by median flux all_traces[:, :, 3] /= central_position[:, 3:] with open("skylines.traces.norm", "w") as lt2: for line in range(all_traces.shape[0]): numpy.savetxt(lt2, all_traces[line, :, :]) print >> lt2, "\n" * 5 # # Now eliminate all lines that have negative median fluxes # logger.info("eliminating all lines with median intensity < 0") negative_intensity = central_position[:, 3] < 0 all_traces[negative_intensity, :, :] = numpy.NaN # # Do the spatial outlier correction first # profiles = all_traces[:, :, 1] print profiles.shape for iteration in range(3): print print "Iteration:", iteration print profiles.shape try: quantiles = numpy.array( numpy.nanpercentile( a=profiles, q=[16, 50, 84], axis=0, )) print "new:", quantiles.shape except: break # quantiles = scipy.stats.scoreatpercentile( # a=profiles, # per=[16,50,84], # axis=0, # limit=(-1*all_traces.shape[1], 2*all_traces.shape[1]) # ) # print quantiles # median = quantiles[1] # print median # sigma = 0.5*(quantiles[2] - quantiles[0]) median = quantiles[1, :] sigma = 0.5 * (quantiles[2, :] - quantiles[0, :]) outlier = (profiles > median + 3 * sigma) | (profiles < median - 3 * sigma) profiles[outlier] = numpy.NaN all_traces[:, :, 3][outlier] = numpy.NaN with open("skylines.traces.clean", "w") as lt2: for line in range(all_traces.shape[0]): numpy.savetxt(lt2, all_traces[line, :, :]) print >> lt2, "\n" * 5 medians = bottleneck.nanmedian(all_traces, axis=0) numpy.savetxt("skylines.traces.median", medians) print medians.shape stds = bottleneck.nanstd(all_traces, axis=0) stds[:, 0] = medians[:, 0] numpy.savetxt("skylines.traces.std", stds) # # Now reconstruct the final line traces, filling in gaps with values # predicted by the median profile # logger.info("Reconstructing individual line profiles") if (False): all_median = numpy.repeat(medians.reshape((-1, 1)), all_traces.shape[0], axis=1) print all_median.shape, all_traces[:, :, 1].shape outlier = numpy.isnan(all_traces[:, :, 1]) print outlier.shape print outlier try: all_traces[:, :, 1][outlier] = all_median[:, :][outlier] except: pass all_traces[:, :, 1] += central_position[:, 1:2] with open("skylines.traces.corrected", "w") as lt2: for line in range(all_traces.shape[0]): numpy.savetxt(lt2, all_traces[line, :, :]) print >> lt2, "\n" * 5 with open("skylines.traces.corrected2", "w") as lt2: for line in range(all_traces.shape[0]): numpy.savetxt(lt2, all_median[:, :]) print >> lt2, "\n" * 5 # compute average intensity profile, weighting each line profile by its # median intensity logger.info("Computing intensity profile") print central_position[:, 3] sort_intensities = numpy.argsort(central_position[:, 3]) strong_lines = sort_intensities[-10:] print strong_lines strong_line_fluxes = central_position[:, 3][strong_lines] strong_line_traces = all_traces[strong_lines, :, :] print strong_line_traces.shape i_sum = bottleneck.nansum(strong_line_traces[:, :, 3] * strong_line_fluxes.reshape((-1, 1)), axis=0) i_count = bottleneck.nansum(strong_line_traces[:, :, 3] / strong_line_traces[:, :, 3] * strong_line_fluxes.reshape((-1, 1)), axis=0) i_avg = i_sum / i_count print i_sum.shape numpy.savetxt("skylines.traces.meanflux", i_avg) fm = filter_with_padding(i_avg, w=50, fct=bottleneck.nanmedian) print fm.shape numpy.savetxt("skylines.traces.meanflux2", fm) # # Now fit each individual profile by scaling the median profile # scalings = [] def arc_model(p, medianarc): return p[0] * medianarc + p[1] * (numpy.arange(medianarc.shape[0]) - medianarc.shape[0] / 2) def arc_error(p, arc, medianarc): model = arc_model(p, medianarc) diff = (arc - model) valid = numpy.isfinite(diff) return diff[valid] if numpy.sum(valid) > 0 else medianarc[ numpy.isfinite(medianarc)] good_flux = fm > 0.5 * numpy.max(fm) for i_arc in range(all_traces.shape[0]): if (numpy.isnan(central_position[i_arc, 1])): continue comb = numpy.empty((all_traces.shape[1], 6)) comb[:, :4] = all_traces[i_arc, :, :] comb[:, 4] = medians[:, 1] # print all_traces[i_arc, :, :].shape, medians[:,1].reshape((-1,1)).shape # comb = numpy.append( # all_traces[i_arc, :, :], # medians[:,1].reshape((-1,1)), axis=1) ypos = int(central_position[i_arc, 1]) p_init = [1.0, 0.0] fit_args = (all_traces[i_arc, :, 1][good_flux], medians[:, 1][good_flux]) fit_result = scipy.optimize.leastsq( arc_error, p_init, args=fit_args, maxfev=500, full_output=1, ) p_bestfit = fit_result[0] print central_position[i_arc, 1], p_bestfit scaling = comb[:, 4] / comb[:, 1] scalings.append([ ypos, bottleneck.nanmedian(scaling), bottleneck.nanmean(scaling), p_bestfit[0], p_bestfit[1] ]) med_scaling = bottleneck.nanmedian(scaling) comb[:, 5] = arc_model(p_bestfit, medians[:, 1]) numpy.savetxt("ARC_%04d.delete" % (ypos), comb) numpy.savetxt("all_scalings", numpy.array(scalings)) def model_linear(p, x): model = p[0] * x + p[1] return model def fit_linear(p, x, y): model = model_linear(p, x) diff = y - model valid = numpy.isfinite(diff) return diff[valid] if valid.any() else y fit_scalings = numpy.array(scalings) p_scale = fit_with_rejection( fit_scalings[:, 0], fit_scalings[:, 3], fit_linear, [0., 1.], ) fit_scalings[:, 1] = model_linear(p_scale, fit_scalings[:, 0]) numpy.savetxt("all_scalings_scale", numpy.array(fit_scalings)) fit_skew = numpy.array(scalings) p_skew = fit_with_rejection( fit_scalings[:, 0], fit_scalings[:, 4], fit_linear, [0., 0.], ) fit_skew[:, 2] = model_linear(p_skew, fit_scalings[:, 0]) numpy.savetxt("all_scalings_skew", numpy.array(fit_skew)) # # Now compute spline function for the median curvature profile # logger.info("Computing spline function for median curvature profile") mc_spline = scipy.interpolate.interp1d( x=numpy.arange(medians.shape[0]), y=medians[:, 1], kind='linear', bounds_error=False, fill_value=0, ) # # Compute full 2-d map of effective X positions # logger.debug("Computing full 2-D effective-X map") y, x = numpy.indices(imgdata.shape) #x_eff = x + x*(p_scale[0]*mc_spline(y) + p_skew[0]*y) + p_scale[1] + p_skew[1]*y print "best-fit curvature scaling", p_scale print "best-fit curvature skew:", p_skew x_eff = x for iteration in range(3): x_eff = x - ((p_scale[0] * x_eff + p_scale[1]) * mc_spline(y) + (p_skew[0] * x_eff + p_skew[1]) * (y - imgdata.shape[0] / 2)) fits.PrimaryHDU(data=x_eff).writeto("x_eff_%d.fits" % (iteration + 1), clobber=True) # # Convert x-eff map to wavelength # a0 = hdulist[0].header['WLSFIT_0'] a1 = hdulist[0].header['WLSFIT_1'] a2 = hdulist[0].header['WLSFIT_2'] a3 = hdulist[0].header['WLSFIT_3'] wl_map = 0. for order in range(hdulist[0].header['WLSFIT_N']): a = hdulist[0].header['WLSFIT_%d' % (order)] wl_map += a * numpy.power(x_eff, order) fits.PrimaryHDU(data=wl_map).writeto("wl_map.fits", clobber=True) return x_eff, wl_map, medians, p_scale, p_skew, fm
def time_nanstd(self, dtype, shape): bn.nanstd(self.arr)
def time_nanstd(self, dtype, shape, order, axis): bn.nanstd(self.arr, axis=axis)
def std(data, axis=None): if axis > 0: return nanstd(data.swapaxes(0, axis), axis=0) else: return nanstd(data, axis=axis)