def biweight(x, cst): """ Computes the biweight average and midvariance for a given 1D array. Returns a tuple (biweight mean, biweight variance). Parameters ---------- x: {ndarray} Input Array cst : {float} Parameter controlling how outliers are censored. Notes ----- The function is restricted to 1D data only. """ assert (x.ndim == 1, "1D array only !") xmed = ma.median(x, 0) manom = x - xmed mad = ma.median(ma.absolute(manom)) u_i = (manom/float(cst*mad)) u_i *= ma.less_equal(ma.absolute(u_i), 1.).astype(float) w_i = (1-u_i**2) if ma.count(w_i) > 0: biw_m = xmed + ma.sum(manom * w_i**2)/ma.sum(w_i**2) else: biw_m = xmed biw_sd = ma.sqrt(ma.count(x)*ma.sum(manom**2 * w_i**4)) biw_sd *= 1./ma.absolute(ma.sum(w_i * (1-5*u_i**2))) return (biw_m, biw_sd.item())
def ant_beam(gsm_array, gsm_var,gaindb,sim_var,label,freq,plotf): """ Combines the gsm and sim datasets for a given place/time. Note I've limited the frequency range that is loaded to avoid memory errors Re-wrote to limit to a single frequency Expects sim_var,gsm_var to be in degrees. """ gain_beam = sim_beam_interp(gsm_var,gaindb,sim_var) full_beam = gain_beam*gsm_array nandata = np.where(np.isnan(full_beam)) for i in range(0,len(nandata[0])): full_beam[nandata[0][i]]=0.0 summed_beam = ma.sum(ma.sum(full_beam,axis=0),axis=0) summed_sim = ma.sum(ma.sum(gain_beam,axis=0),axis=0) #Allows you to make plots to check results at a single frequency only if you set plotf to be within the frequency range of the data. if freq==plotf: plt.rc('font',size=8) plt.subplot(411) plt.scatter(sim_var[:,0]*180./np.pi,sim_var[:,1]*180./np.pi,s=1,linewidth=0,c=np.power(10.,0.05*gaindb),vmin=0,vmax=3,cmap=cm.jet) plt.colorbar() plt.xlim(0,360) plt.ylim(-90,90) plt.ylabel('DEC (degrees)') plt.title('Simulated HIbiscus Beam (linear power)') plt.subplot(412) plt.scatter(gsm_var[:,0],gsm_var[:,1],s=1,linewidth=0,c=gain_beam,vmin=0,vmax=3,cmap=cm.jet) plt.colorbar() plt.xlim(0,360) plt.ylim(-90,90) plt.ylabel('DEC (degrees)') plt.title('Interpolated HIbiscus Beam (linear power)') plt.subplot(413) plt.scatter(gsm_var[:,0],gsm_var[:,1],s=1,linewidth=0,c=gsm_array,vmin=0,vmax=2e4,cmap=cm.jet) plt.colorbar() plt.xlim(0,360) plt.ylim(-90,90) plt.ylabel('DEC (degrees)') plt.title('GSM Data (Kelvin)') plt.subplot(414) plt.scatter(gsm_var[:,0],gsm_var[:,1],s=1,linewidth=0,c=full_beam,vmin=0,vmax=5e4,cmap=cm.jet) plt.colorbar() plt.xlim(0,360) plt.ylim(-90,90) plt.xlabel('RA (degrees)') plt.ylabel('DEC (degrees)') plt.title('Expected Signal (Kelvin)') plt.subplots_adjust(hspace=0.4) plt.savefig(label,dpi=300) plt.clf() final_result = summed_beam/summed_sim return final_result
def computeAveragesUsingNumpy(): global sizeX, sizeY, sizeZ flattenedArrays = [] for fileName in fileNames: fpath = os.path.join(basepath, fileName) print('processing %s' % fpath) year = fileName.split('_')[-1][:-4] dataset = gdal.Open(fpath) sumArray = ma.zeros((dataset.RasterYSize, dataset.RasterXSize)) total = 0 count = 0 numBands = dataset.RasterCount for bandId in range(numBands): band = ma.masked_outside(dataset.GetRasterBand(bandId + 1).ReadAsArray(), VALUE_RANGE[0], VALUE_RANGE[1]) sumArray += band sumArray /= numBands total = ma.sum(ma.sum(sumArray)) count = sumArray.count() minCell = ma.min(sumArray) maxCell = ma.max(sumArray) sizeX = dataset.RasterXSize sizeY = dataset.RasterYSize flattenedArrays.append(np.ndarray.flatten(sumArray[::-1,:], 0).astype(np.dtype(np.int32))) sizeZ = len(flattenedArrays) return np.ma.concatenate(flattenedArrays)
def entropy(array, dim=None): if dim is None: array = array.ravel() dim = 0 n = ma.sum(array, dim) array = ma.log(array) * array sum = ma.sum(array, dim) return (ma.log(n) - sum / n) / ma.log(2.0)
def add_chunk(self, chunk): if self.masked: ma.sum(chunk, axis=self.axis, out=self.temp) self.running_total += self.temp.filled(0) self.running_count += ma.count(chunk, axis=self.axis) else: np.sum(chunk, axis=self.axis, out=self.temp) self.running_total += self.temp
def average_in_flux(mag, dmag, axis=None): flux = 10**(mag / -2.5) dflux = np.log(10) / 2.5 * flux * dmag avg_dflux = np.power(np.sum(np.power(dflux, -2), axis), -0.5) avg_flux = np.sum(flux * np.power(dflux, -2), axis) * avg_dflux**2 avg_mag = -2.5 * np.log10(avg_flux) avg_dmag = 2.5 / np.log(10) * np.divide(avg_dflux, avg_flux) return avg_mag, avg_dmag
def _compute_variable_stats(variable, axis, weights, calc_avg, calc_min, calc_max, calc_stddev, calc_count): ''' Calculate statistics for a single variable. ''' # Get the data out. Note: scale_factor and add_offset are automatically # applied. data = variable[:] # Make sure data is a masked array data = ma.masked_array(data) # Broadcast the weights before we try to combine the masks for data and # weights weights = ma.masked_array(data=np.broadcast_to(weights.data, data.shape), mask=np.broadcast_to(ma.getmaskarray(weights), data.shape)) # We want all our calculations to happen over areas that are unmasked in # both the weights and data combined_mask = np.logical_or(ma.getmaskarray(data), ma.getmaskarray(weights)) data = ma.masked_array(data.data, mask=combined_mask) weights = ma.masked_array(weights.data, mask=combined_mask) out = {} if calc_count: # Irritatingly, the ma.count function can only take one value at a time # for the axis. So, instead, construct an array of ones ones = np.ones(data.shape) # Set the masked areas to 0 ones[combined_mask] = 0 out["count"] = ma.sum(ones, axis=axis) if calc_min: out["min"] = ma.min(data, axis=axis) if calc_max: out["max"] = ma.max(data, axis=axis) # Note: standard deviation needs the weighted average and the weights sum if calc_avg or calc_stddev: sum_weights = _add_axes_back(ma.sum(weights, axis=axis), axis) weighted_avg_numerator = _add_axes_back( ma.sum(weights * data, axis=axis), axis) weighted_avg = weighted_avg_numerator / sum_weights if calc_avg: out["avg"] = ma.squeeze(weighted_avg, axis=axis) if calc_stddev: # calculate the anomaly anomaly = data - weighted_avg # calculate the standard deviation variance = ma.sum(weights * (anomaly**2) / sum_weights, axis=axis) out["stddev"] = np.sqrt(variance) return out
def printstuff(vals, vals_sum, file_count): # create running sum of values print file_count print '----------------' print 'sum', ma.sum(vals) print 'total sum', ma.sum(vals_sum) print '----------------' print 'max', ma.amax(vals) print 'min', ma.amin(vals) print '\n----------------'
def printstuff(vals,vals_sum,file_count): # create running sum of values print file_count print '----------------' print 'sum', ma.sum(vals) print 'total sum', ma.sum(vals_sum) print '----------------' print 'max', ma.amax(vals) print 'min', ma.amin(vals) print '\n----------------'
def printstuff(vals, vals_sum, file_count): # create running sum of values print file_count print "----------------" print "sum", ma.sum(vals) print "total sum", ma.sum(vals_sum) print "----------------" print "max", ma.amax(vals) print "min", ma.amin(vals) print "\n----------------"
def muti_bin2(css, Nc, bs, nc): ncs_1 = ma.array( np.apply_along_axis(lambda x: np.bincount(x, minlength=Nc), 1, css[:, bs == 1])) ncs_0 = nc - ncs_1 N = len(bs) nb1 = bs.sum() nb0 = N - nb1 return (1./N * (ma.sum(ncs_1 * ma.log(1.*N*ncs_1/nc/nb1),1).filled(0) +\ ma.sum(ncs_0 * ma.log(1.*N*ncs_0/nc/nb0),1).filled(0)))
def _var(A, axis=0, keepdims=True, weights=None): if weights is None: return npm.var(A, axis=axis, keepdims=keepdims) else: mu = npm.average(A, axis=axis, keepdims=keepdims, weights=weights) w = npm.sum(weights, axis=axis, keepdims=keepdims) var = npm.sum(weights * (A - mu)**2, axis=axis, keepdims=keepdims, weights=weights) / w**2 return var
def printstuff(vals, vals_sum, file_count): # create running sum of values print file_count print '----------------' print 'sum', ma.sum(vals) print 'total sum', ma.sum(vals_sum) print 'perc masked:', ma.count_masked(vals_sum) / vals_sum.size * 100., '%' print '----------------' print 'max', ma.amax(vals) print 'min', ma.amin(vals) print '\n----------------'
def simple_extraction(x, y, binNum, cube, verbose=False, type='sum'): """Extract a spectrum by simply summing all the pixels in a bin at each wavelength value Ignore nans by making a global 'nan' mask and using the bitwise and with that and the bin mask. Can either add spectra by summing or median combining (very simply! No continuum adition or anything) """ binNum=binNum.astype(int) number_of_bins=len(np.unique(binNum)[np.unique(binNum)!=-1]) d1, d2, d3=cube.shape spectra=np.empty((number_of_bins, d1)) #Mask all nans in the cube nan_mask=np.zeros_like(cube).astype(bool) nan_values=np.where(~np.isfinite(cube)) nan_mask[nan_values]=True for i in range(number_of_bins): if verbose: print "Extracting spectrum {} of {}".format(i, number_of_bins) #Create a mask with True where the bin indices are, false everywhere else inds=np.where(binNum==(i)) x_inds=x[inds].astype(int) y_inds=y[inds].astype(int) aperture_indices=[y_inds, x_inds] #True corresponds to masked mask=np.ones_like(cube[0, :, :]).astype(bool) mask[y_inds, x_inds]=False aperture_mask = np.repeat(mask[np.newaxis, :, :], d1, axis=0) final_mask=np.bitwise_or(aperture_mask,nan_mask) masked_cube=ma.array(cube, mask=final_mask) if type=='sum': spectra[i :]=ma.sum(ma.sum(masked_cube, axis=2), axis=1) elif type=='median': spectra[i :]=ma.median(ma.median(masked_cube, axis=2), axis=1) else: return NameError('Type of combination not understood') return spectra, nan_mask
def printstuff(vals,vals_sum,file_count): # create running sum of values print file_count print '----------------' print 'sum', ma.sum(vals) print 'total sum', ma.sum(vals_sum) print 'perc masked:', ma.count_masked(vals_sum)/vals_sum.size*100.,'%' print '----------------' print 'max', ma.amax(vals) print 'min', ma.amin(vals) print '\n----------------'
def __call__(self, target): array = equi_n_discretization(self.array.copy(), intervals=5, dim=0) ind1, ind2 = self.test_indices(target) a1, a2 = array[ind1, :], array[ind2, :] dist1, dist2 = [], [] dist = ma.zeros((array.shape[1], 2, 5)) for i in range(5): dist1.append(ma.sum(ma.ones(a1.shape) * (a1 == i), 0)) dist2.append(ma.sum(ma.ones(a2.shape) * (a2 == i), 0)) dist[:, 0, i] = dist1[-1] dist[:, 1, i] = dist2[-1] return list(zip(self.keys, achisquare_indtest(np.array(dist), dim=1)))
def getExtentAreaFromConc(iceConcMon): iceConcMon = ma.masked_where(iceConcMon <= 0.15, iceConcMon) iceConcMon = ma.masked_where(ice_flag >= 1.5, iceConcMon) concHole = ma.mean(iceConcMon[(lats > pmask - 0.5) & (lats < pmask)]) iceConcMonP = ma.where((lats >= pmask), 1., iceConcMon) iceConcMonA = ma.where((lats >= pmask), concHole, iceConcMon) iceExtent = ma.sum(ma.where((iceConcMonP > 0.15), 1, 0) * areaF) iceArea = ma.sum(iceConcMonA * areaF) return iceExtent, iceArea
def computeAveragesAndBuildImgData(): imgDims = [1440, 720, 10] imgData = vtk.vtkImageData() imgData.SetDimensions(imgDims[0] + 1, imgDims[1] + 1, imgDims[2] + 1) imgData.SetSpacing(1, 1, 1) imgData.SetOrigin(0, 0, 0) imgData.SetExtent(0, imgDims[0], 0, imgDims[1], 0, imgDims[2]) imgData.AllocateScalars(vtk.VTK_FLOAT, 1) flattenedArrays = [] for fileName in fileNames: fpath = os.path.join(basepath, fileName) print('processing %s' % fpath) year = fileName.split('_')[-1][:-4] dataset = gdal.Open(fpath) sumArray = ma.zeros((dataset.RasterYSize, dataset.RasterXSize)) total = 0 count = 0 numBands = dataset.RasterCount for bandId in range(numBands): band = ma.masked_outside(dataset.GetRasterBand(bandId + 1).ReadAsArray(), VALUE_RANGE[0], VALUE_RANGE[1]) sumArray += band sumArray /= numBands total = ma.sum(ma.sum(sumArray)) count = sumArray.count() minCell = ma.min(sumArray) maxCell = ma.max(sumArray) imgDims = [dataset.RasterXSize, dataset.RasterYSize] # flattenedArrays.append(np.ndarray.flatten(sumArray[::-1,:], 0).astype(np.dtype(np.float32))) flattenedArrays.append(np.ndarray.flatten(sumArray[::-1,:], 0)) allYearsAvgs = np.ma.concatenate(flattenedArrays) cellData = imgData.GetCellData() dataArray = numpy_support.numpy_to_vtk(np.ndarray.flatten(allYearsAvgs, 0), deep = 1) dataArray.SetName('Annual Avg Temp') cellData.SetScalars(dataArray) imgWriter = vtkIOXML.vtkXMLImageDataWriter() imgWriter.SetFileName('/data/scott/Documents/tasmax.vti') imgWriter.SetInputData(imgData) imgWriter.Write() print('\nFinished writing image data\n')
def _find_lidar_only_clouds(detection: np.ndarray) -> np.ndarray: """Finds top clouds that contain only lidar-detected pixels. Args: detection: Array of integers where 1=lidar, 2=radar, 3=both. Returns: Boolean array containing top-clouds that are detected only by lidar. """ sum_of_cloud_pixels = ma.sum(detection > 0, axis=1) sum_of_detection_type = ma.sum(detection, axis=1) return sum_of_cloud_pixels / sum_of_detection_type == 1
def calculateELBO(self): # Compute Lower Bound using the Gaussian likelihood with pseudodata # TO-DO: MASK MISSING VALUES??? Z = self.markov_blanket["Z"].getExpectation() SW = self.markov_blanket["SW"].getExpectation() tau = self.markov_blanket["Tau"].getExpectation() N = Z.shape[0] lb = 0.5*(N*ma.sum(s.log(tau)) - ma.sum(tau*(self.E-s.dot(Z,SW.T))**2 )) # (1) tau is of shape (D,) (2) missing a constant term # tau_expanded = s.repeat(tau[None,:],N,0) # tau_expanded = ma.masked_where(ma.getmask(self.obs), tau_expanded) # lb = 0.5*( ma.sum(s.log(tau_expanded)) - ma.sum(tau_expanded*(self.E-s.dot(Z,SW.T))**2 ) ) # (1) tau is of shape (D,) (2) missing a constant term return lb
def maskImageStats(mimage): n=ma.count(mimage) mimagesq=mimage*mimage sum1=ma.sum(mimage) sum2=ma.sum(sum1) sumsq1=ma.sum(mimagesq) sumsq2=ma.sum(sumsq1) avg=sum2/n if (n > 1): stdev=math.sqrt((sumsq2-sum2*sum2/n)/(n-1)) else: stdev=2e20 return n,avg,stdev
def maskImageStats(mimage): n = ma.count(mimage) mimagesq = mimage * mimage sum1 = ma.sum(mimage) sum2 = ma.sum(sum1) sumsq1 = ma.sum(mimagesq) sumsq2 = ma.sum(sumsq1) avg = sum2 / n if (n > 1): stdev = math.sqrt((sumsq2 - sum2 * sum2 / n) / (n - 1)) else: stdev = 2e20 return n, avg, stdev
def computeAveragesUsingNumpy(): for fileName in fileNames: fpath = os.path.join(basepath, fileName) print('processing %s' % fpath) year = fileName.split('_')[-1][:-4] dataset = gdal.Open(fpath) sumArray = ma.zeros((dataset.RasterYSize, dataset.RasterXSize)) total = 0 count = 0 numBands = dataset.RasterCount for bandId in range(numBands): band = ma.masked_outside( dataset.GetRasterBand(bandId + 1).ReadAsArray(), VALUE_RANGE[0], VALUE_RANGE[1]) sumArray += band sumArray /= numBands total = ma.sum(ma.sum(sumArray)) count = sumArray.count() minCell = ma.min(sumArray) maxCell = ma.max(sumArray) imgDims = [dataset.RasterXSize, dataset.RasterYSize] print(' finished computing averge for %s, writing image data' % year) imgData = vtk.vtkImageData() imgData.SetDimensions(imgDims[0], imgDims[1], 0) imgData.SetSpacing(1, 1, 1) imgData.SetOrigin(0, 0, 0) imgData.SetExtent(0, imgDims[0] - 1, 0, imgDims[1] - 1, 0, 0) imgData.AllocateScalars(vtk.VTK_FLOAT, 1) pointData = imgData.GetPointData() dataArray = numpy_support.numpy_to_vtk(np.ndarray.flatten( sumArray[::-1, :], 0).astype(np.dtype(np.int32)), deep=1) dataArray.SetName('Annual Avg Temp') pointData.SetScalars(dataArray) imgWriter = vtkIOXML.vtkXMLImageDataWriter() imgWriter.SetFileName('tasmax_%s.vti' % (year)) imgWriter.SetInputData(imgData) imgWriter.Write() print(' finished writing image data for %s' % year)
def ant_beam_simple(gsm_array,gain_beam): """ Just doing the calculation of the single beam value for two arrays. Both arrays must have the same RA/DEC array. """ full_beam = gain_beam*gsm_array nandata = np.where(np.isnan(full_beam)) for i in range(0,len(nandata[0])): full_beam[nandata[0][i]]=0.0 summed_beam = ma.sum(ma.sum(full_beam,axis=0),axis=0) summed_sim = ma.sum(ma.sum(gain_beam,axis=0),axis=0) final_result = summed_beam/summed_sim return final_result
def bad_data(data_col, threshold=5.0,max_iter=5, fubar_fraction=0.5, verbose=False): xx, xy, yx, yy, num_pol = split_data_col(data_col) flags = reduce(logical_or, map(lambda x: single_correlation_flags(x,threshold=threshold,max_iter=max_iter, verbose=verbose), [xx, xy, yx, yy])) bad_channels = ma.sum(flags,axis=0) > data_col.shape[0]*fubar_fraction bad_timeslots = ma.sum(flags,axis=1) > data_col.shape[1]*fubar_fraction flags |= logical_or(bad_channels[newaxis, :], bad_timeslots[:, newaxis]) full_flags = zeros(data_col.shape, dtype = bool) for i in range(4): full_flags[:,:,i] = flags return full_flags
def get_slices(target, forest_lim, norm_lim, z, loglam, ivar): offset = get_fiducial_pixel_index_offset(loglam.data[0]) # determine fiducial wavelength offsets of observed forest pixels forest_lo_index, forest_hi_index = get_index_lim(forest_lim, z, clip=loglam.data) # check to see if the observed wavelength range overlaps the forest if forest_lo_index >= forest_hi_index: raise ForestError('{}: no forest pixels [{}:{}], z = {}'.format( target['target'], forest_lim[0], forest_lim[1], z)) # the uniform wavelength grid slice to use for this observation forest_slice = slice(forest_lo_index, forest_hi_index) spec_slice = slice(forest_lo_index - offset, forest_hi_index - offset) # check for unmasked pixels in forest window if ma.sum(ivar[spec_slice].mask) == len(ivar[spec_slice]): raise ForestError( '{}: no unmasked pixels in forest [{}:{}], z = {}'.format( target['target'], forest_lim[0], forest_lim[1], z)) # find normalization window norm_lo_index, norm_hi_index = get_index_lim(norm_lim, z) norm_slice = slice(norm_lo_index - offset, norm_hi_index - offset) return forest_slice, spec_slice, norm_slice
def train_step_sequential(self, epoch, indices=None): """A single step of sequential training algorithm. """ indices = range(len(self.data)) if indices == None else indices for ind in indices: x = self.data[ind] Dx = self.vectors - self.data[ind] Dist = ma.sum(Dx**2, 1) min_dist = ma.min(Dist) bmu = ma.argmin(Dist) self.distances.append(min_dist) iter = epoch*len(self.data)+ind if self.neighbourhood == Map.NeighbourhoodGaussian: h = numpy.exp(-self.unit_distances[:, bmu]**2/(2*self.radius_seq(iter)**2)) * (self.unit_distances[:, bmu]**2 <= self.radius_seq(iter)**2) elif self.neighbourhood == Map.NeighbourhoodEpanechicov: h = 1.0 - (self.unit_distances[:bmu]/self.radius_seq(iter))**2 h = h * (h >= 0.0) else: h = 1.0*(self.unit_distances[:, bmu] <= self.radius_seq(iter)) h = h * self.alpha(iter) nonzero = ma.nonzero(h) h = h[nonzero] self.vectors[nonzero] = self.vectors[nonzero] - Dx[nonzero] * numpy.reshape(h, (len(h), 1))
def _calc_correlation(self, values_1, values_2, conf_level=0.95): """ Calculates Pearson's correlation coeffcient. Arguments: values_1 -- first data values_2 -- second data conf_level -- confidence level Returns: (corr_coeff, significance) -- correlation coefficient and significance arrays """ n_samples = values_1.shape[0] # Sample length # Calculate Pearson's correlatiob coefficient values_cov = ma.sum((values_1 - ma.mean(values_1, axis=0)) * (values_2 - ma.mean(values_2, axis=0)), axis=0) corr_coef = values_cov / (ma.std(values_1, axis=0) * ma.std(values_2, axis=0)) / n_samples # Calculate significance using t-distribution with n-2 degrees of freedom. deg_fr = n_samples - 2 # Degrees of freedom. t_distr = ma.abs( corr_coef * ma.sqrt(deg_fr / (1. - corr_coef**2))) # Student's t-distribution. prob = 0.5 + conf_level / 2 # Probability for two tails. cr_value = student_t.ppf(prob, deg_fr) # Student's Critical value. significance = ma.greater(t_distr, cr_value) return corr_coef, significance
def get_sum_of_squared_errors_with_multiprocessing(self, training_examples, weight_index, pool): sum_of_squared_errors = pool.starmap( self.get_squared_error_multiprocessing, [(training_example, weight_index) for training_example in training_examples]) return sum(sum_of_squared_errors)
def calculateELBO(self): # Compute Lower Bound using the Bernoulli likelihood with observed data Z = self.markov_blanket["Z"].getExpectation() SW = self.markov_blanket["SW"].getExpectation() tmp = s.dot(Z, SW.T) lik = ma.sum(self.obs * tmp - s.log(1. + s.exp(tmp))) return lik
def calculateCentroidMeasurements(self): self.X[self.badFrames, :] = ma.masked if not self.useSmoothingFilterDerivatives: self.v[1:-1] = (self.X[2:, :] - self.X[0:-2])/(2.0/self.frameRate) else: # use a cubic polynomial filter to estimate the velocity self.v = ma.zeros(self.X.shape) halfWindow = int(np.round(self.filterWindow/2.*self.frameRate)) for i in xrange(halfWindow, self.v.shape[0]-halfWindow): start = i-halfWindow mid = i finish = i+halfWindow+1 if not np.any(self.X.mask[start:finish,:]): px = np.polyder(np.polyfit(self.t[start:finish]-self.t[mid], self.X[start:finish, 0], 3)) py = np.polyder(np.polyfit(self.t[start:finish]-self.t[mid], self.X[start:finish, 1], 3)) self.v[i,:] = [np.polyval(px, 0), np.polyval(py, 0)] else: self.v[i,:] = ma.masked self.s = ma.sqrt(ma.sum(ma.power(self.v, 2), axis=1)) self.phi = ma.arctan2(self.v[:, 1], self.v[:, 0]) self.t[self.badFrames] = ma.masked self.X[self.badFrames, :] = ma.masked self.v[self.badFrames, :] = ma.masked self.s[self.badFrames] = ma.masked self.phi[self.badFrames] = ma.masked
def moment(self,chanrange=None,p=1): """Compute the power-weighted moment of a spectrum If a mask exists, this function operates on the masked spectrum. f_mean = Sum(spec[i]**2*(f[i]-mom{p-1})^p])/Sum(spec[i]**2) where spec[i] is the intensity at channel i (spec[i]**2 is he power) and f[i] is the frequency at channel i, p is the moment power, and mom{p-1} is the p-1-th moment [for p >1]. Parameters ---------- chanrange: range of channels over which to compute moment [startchan, endchan] p: the moment to compute (the power of the frequency in the sum) Returns: The computed moment """ # get the masked array s = self.spec() chupper = len(s)-1 chanrange = self._sanitizechanrange(chanrange,chupper) sum_s = ma.sum(s[chanrange[0]:chanrange[1]+1]*s[chanrange[0]:chanrange[1]+1]) sum_sf = 0 mean = 0 if p > 1: mean = self.moment(chanrange,p-1) for i in range(chanrange[0],chanrange[1]+1): sum_sf += s[i]*s[i]*math.pow((self._freq[i]-mean),p) return sum_sf/sum_s
def test_trace(self): (x, X, XX, m, mx, mX, mXX,) = self.d mXdiag = mX.diagonal() assert_equal(mX.trace(), mX.diagonal().compressed().sum()) assert_(eq(mX.trace(), X.trace() - sum(mXdiag.mask * X.diagonal(), axis=0)))
def test_off_map(self) : self.ra_inds[3] = 10 self.dec_inds[6] = -1 dirty_map.add_data_2_map(self.data, self.ra_inds, self.dec_inds, self.map, self.noise_inv) self.assertAlmostEqual(ma.sum(self.map), self.ntime*self.shape[-1]-2*self.shape[2])
def achisquare_indtest(observed, dim=None): if observed.ndim == 2: observed = ma.array([observed]) if dim is not None: dim += 1 if dim is None: dim = observed.ndim - 2 rowtotal = ma.sum(observed, dim + 1) coltotal = ma.sum(observed, dim) total = ma.sum(rowtotal, dim) ones = ma.array(ma.ones(observed.shape)) expected = ones * rowtotal.reshape(rowtotal.shape[:dim] + (-1, 1)) a = ones * coltotal[..., np.zeros(observed.shape[dim], dtype=int), :] expected = expected * (a) / total.reshape((-1, 1, 1)) chisq = ma.sum(ma.sum((observed - expected)**2 / expected, dim + 1), dim) return chisq
def momenti(self,chanrange=None,p=1): """Intensity-weighted moment Note this moment does poorly for very narrow line segments where some channels may be negative." Parameters ---------- chanrange: range of channels over which to compute moment [startchan, endchan] p: the moment to compute (the power of the frequency in the sum) Returns: The computed moment """ # get the masked array s = self.spec() chupper = len(s)-1 chanrange = self._sanitizechanrange(chanrange,chupper) sum_s = ma.sum(s[chanrange[0]:chanrange[1]+1]) sum_sf = 0 mean = 0 if p > 1: mean = self.moment(chanrange,p-1) for i in range(chanrange[0],chanrange[1]+1): sum_sf += s[i]*math.pow((self._freq[i]-mean),p) return sum_sf/sum_s
def alias(t,fm,p): """ Evaluate the Bayes Ratio between signal with P and 0.5*P Parameters ---------- t : Time series fm : Flux series p : Parameter dictionary. """ pA = copy.deepcopy(p) pA['P'] = 0.5 * pA['P'] resL = LDTwrap(t,fm,pA) res = np.hstack(resL) # Masked array corresponding to P = 2 P tfold = getT(res['tdt'],pA['P'],pA['epoch'],pA['tdur']) tT = ma.masked_array(res['tdt'],copy=True,mask=tfold.mask) fT = ma.masked_array(res['fdt'],copy=True,mask=tfold.mask) X2 = lambda par : ma.sum( (fT - keptoy.P05(pd2a(par),tT))**2 ) return X2(p),X2(pA)
def momenta(self,chanrange=None,p=1): """abs(intensity)-weighted moment Does somewhat better than signed intensity-weighted moment. Parameters ---------- chanrange: range of channels over which to compute moment [startchan, endchan] p: the moment to compute (the power of the frequency in the sum) Returns: The computed moment """ # get the masked array s = self.spec() chupper = len(s)-1 chanrange = self._sanitizechanrange(chanrange,chupper) sum_s = ma.sum(ma.abs(s[chanrange[0]:chanrange[1]+1])) sum_sf = 0 mean = 0 if p > 1: mean = self.moment(chanrange,p-1) for i in range(chanrange[0],chanrange[1]+1): sum_sf += ma.abs(s[i])*math.pow((self._freq[i]-mean),p) return sum_sf/sum_s
def update_background(fn): with fits.open(fn, mode='update') as hdu: im = hdu[0].data.copy() mask = ~np.isfinite(im) + (im < DATA_FLOOR) if 'MASK' in hdu: mask += hdu['MASK'].data > 0 im = ma.MaskedArray(im, mask=mask, copy=True) scim = sigma_clip(im) mean = ma.mean(scim) mean = mean if mean is not ma.masked else 0 median = ma.median(scim) median = median if median is not ma.masked else 0 stdev = ma.std(scim) stdev = stdev if stdev is not ma.masked else 0 hdu['SCI'].header['bgmean'] = (mean, 'background sigma-clipped mean') hdu['SCI'].header['bgmedian'] = (median, 'background sigma-clipped median') hdu['SCI'].header['bgstdev'] = ( stdev, 'background sigma-clipped standard dev.') hdu['SCI'].header['nbg'] = (ma.sum(~scim.mask), 'area considered in background stats.')
def train_step_sequential(self, epoch, indices=None): indices = list(range(len(self.data))) if indices == None else indices for ind in indices: x = self.data[ind] Dx = self.vectors - self.data[ind] Dist = ma.sum(Dx**2, 1) min_dist = ma.min(Dist) bmu = ma.argmin(Dist) self.distances.append(min_dist) if self.neighbourhood == Map.NeighbourhoodGaussian: h = numpy.exp( -self.unit_distances[:, bmu] / (2 * self.radius(epoch))) * (self.unit_distances[:, bmu] <= self.radius(epoch)) elif self.neighbourhood == Map.NeighbourhoodEpanechicov: h = 1.0 - (self.unit_distances[:bmu] / self.radius(epoch))**2 h = h * (h >= 0.0) else: h = 1.0 * (self.unit_distances[:, bmu] <= self.radius(epoch)) h = h * self.alpha(epoch) nonzero = ma.nonzero(h) h = h[nonzero] self.vectors[nonzero] = self.vectors[ nonzero] - Dx[nonzero] * numpy.reshape(h, (len(h), 1))
def chivectors(x, y): nn = np.logical_not(np.logical_or( np.logical_or(np.isnan(x), np.isnan(y)), np.logical_or((x <= 0.0), (y <= 0.0)))) N = np.sum(nn) chisq = (1.0/N) * ma.sum((x[nn] - y[nn])**2 / (x[nn]**2 + y[nn]**2)) return chisq
def __fillHex(array, som): xDim, yDim = som.map_shape ## for n in som.nodes: ## d[tuple(n.pos)]=n d = dict([((i, j), som[i, j]) for i in range(xDim) for j in range(yDim)]) check = lambda x, y: x >= 0 and x < (xDim * 2 - 1) and y >= 0 and y < ( yDim * 2 - 1) dx = [1, 0, -1] dy = [0, 1, 1] for i in range(0, xDim * 2, 2): for j in range(0, yDim * 2, 2): for ddx, ddy in zip(dx, dy): if check(i + ddx, j + ddy): ## array[i+ddx][j+ddy]=d[(i/2, j/2)].getDistance(d[(i/2+ddx, j/2+ddy)].referenceExample) array[i + ddx][j + ddy] = numpy.sqrt( ma.sum((d[(i / 2, j / 2)].vector - d[(i / 2 + ddx, j / 2 + ddy)].vector)**2)) dx = [1, -1, 0, -1, 0, 1] dy = [0, 0, 1, 1, -1, -1] for i in range(0, xDim * 2, 2): for j in range(0, yDim * 2, 2): l = [ array[i + ddx, j + ddy] for ddx, ddy in zip(dx, dy) if check(i + ddx, j + ddy) ] array[i][j] = sum(l) / len(l) return array
def achisquare_indtest(observed, dim=None): if observed.ndim == 2: observed = ma.array([observed]) if dim is not None: dim += 1 if dim is None: dim = observed.ndim - 2 rowtotal = ma.sum(observed, dim + 1) coltotal = ma.sum(observed, dim) total = ma.sum(rowtotal, dim) ones = ma.array(ma.ones(observed.shape)) expected = ones * rowtotal.reshape(rowtotal.shape[:dim] + (-1, 1)) a = ones * coltotal[..., np.zeros(observed.shape[dim], dtype=int),:] expected = expected * (a) / total.reshape((-1, 1, 1)) chisq = ma.sum(ma.sum((observed - expected) ** 2 / expected, dim + 1), dim) return chisq
def get_best_matching_node(self, instance): """Return the best matching node for a given data instance """ instance, c, w = Orange.data.Table([instance]).toNumpyMA() vectors = self.map.vectors() Dist = vectors - instance bmu = ma.argmin(ma.sum(Dist**2, 1)) return list(self.map)[bmu]
def _sum(array, **kwargs): # weighted or scaled sum axis_in = kwargs.get('axis', None) weights_in = kwargs.pop('weights', None) returned_in = kwargs.pop('returned', False) if weights_in is not None: wsum = ma.sum(weights_in * array, **kwargs) else: wsum = ma.sum(array, **kwargs) if returned_in: if weights_in is None: weights = np.ones_like(array) else: weights = weights_in rvalue = (wsum, ma.sum(weights, axis=axis_in)) else: rvalue = wsum return rvalue
def execute(self, nprocesses=1) : params = self.params model = params["model"] kiyopy.utils.mkparents(params['output_root']) parse_ini.write_params(params, params['output_root'] + 'params.ini', prefix=prefix) # Loop over files to process. for file_middle in params['file_middles'] : input_fname = (params['input_root'] + file_middle + params['input_end']) Reader = core.fitsGBT.Reader(input_fname, feedback=self.feedback) output_fname = params["output_root"] + file_middle + ".npy" if model == "scan_var" : n_scans = len(Reader.scan_set) n_IFs = len(Reader.IF_set) first_block = True for jj in range(n_IFs) : # These all become arrays on the first iteration. var = 0.0 mean = 0.0 counts = 0 for ii in range(n_scans) : Data = Reader.read(ii, jj) if first_block : out_shape = (n_IFs,) + Data.dims[1:] out_arr = sp.empty(out_shape, dtype=float) first_block = False var += ma.sum(Data.data**2, 0).filled(0) mean += ma.sum(Data.data, 0).filled(0) counts += ma.count(Data.data, 0) # If we didn't get at least 5 good hits, throw aways the # scan. counts[counts < 5] = -1 var = var/counts - (mean/counts)**2 var[counts < 5] = 1.0e10 out_arr[jj, ...] = var sp.save(output_fname, out_arr) if self.feedback > 1 : print ("Wrote noise parameters to file: " + utils.abbreviate_file_path(output_fname)) else : raise ValueError("Invalid noise model: " + model)
def flatten(ech, method='average'): """Flatten 2-D echelle spectrum to 1-D flat spectrum """ wav = ech.wav[0] assert np.allclose(ech.wav - wav, 0), "ech.wav rows must be identical" ech.flux = ma.masked_invalid(ech.flux) ech.uflux = ma.masked_invalid(ech.uflux) if method=='average': ivar = ech.uflux**-2 # Weighted mean and uncertanty on weighted mean flux = ma.sum( ech.flux * ivar, axis=0 ) / ma.sum(ivar, axis=0) uflux = ma.sqrt( 1 / ma.sum(ivar, axis=0) ) flux.fill_value = np.nan uflux.fill_value = np.nan flux = flux.filled() uflux = uflux.filled() return flux, uflux
def cost(self, p, x, y, u, v): """ """ # I'm not sure this is the best way to count the number of elements. n = len(u.compressed().flatten()) s = self.s vr, vt = uv2nt(x, y, u, v, x_c=s[0]*p[0], y_c=s[1]*p[1]) mag = (u**2+v**2)**0.5 j = 1./(2*n)*ma.sum( (vr/mag)**2 ) return j
def weighted_average(self,axis=0,expaxis=None): """ Calculate weighted average of data along axis after optionally inserting a new dimension into the shape array at position expaxis """ if expaxis is not None: vals = ma.expand_dims(self.vals,expaxis) dmin = ma.expand_dims(self.dmin,expaxis) dmax = ma.expand_dims(self.dmax,expaxis) wt = ma.expand_dims(self.wt,expaxis) else: vals = self.vals wt = self.wt dmin = self.dmin dmax = self.dmax # Get average value avg,norm = ma.average(vals,axis=axis,weights=wt,returned=True) avg_ex = ma.expand_dims(avg,0) # Calculate weighted uncertainty wtmax = ma.max(wt,axis=axis) neff = norm/wtmax # Effective number of samples based on uncertainties # Seeking max deviation from the average; if above avg use max, if below use min term = np.empty_like(vals) indices = np.where(vals > avg_ex) i0 = indices[0] irest = indices[1:] ii = tuple(x for x in itertools.chain([i0],irest)) jj = tuple(x for x in itertools.chain([np.zeros_like(i0)],irest)) term[ii] = (dmax[ii] - avg_ex[jj])**2 indices = np.where(vals <= avg_ex) i0 = indices[0] irest = indices[1:] ii = tuple(x for x in itertools.chain([i0],irest)) jj = tuple(x for x in itertools.chain([np.zeros_like(i0)],irest)) term[ii] = (avg_ex[jj] - dmin[ii])**2 dsum = ma.sum(term*wt,axis=0) # Sum for weighted average of deviations dev = 0.5*np.sqrt(dsum/(norm*neff)) if isinstance(avg,(float,np.float)): avg = avg_ex tmp_min = avg - dev ii = np.where(tmp_min < 0) tmp_min[ii] = TOL*avg[ii] return UncertContainer(avg,tmp_min,avg+dev)
def calc_time_var_file(Blocks, pol_ind=0, cal_ind=0) : """Calculates this time variance over several data blocks. Given a tuple of DataBlock objects (all with compatible dimensions), the time varience is calculated at each frequency. pol_ind and cal_ind specify the polarization and cal. """ # These all become arrays on first iteration. var = 0.0 mean = 0.0 counts = 0 for Data in Blocks : var += ma.sum(Data.data[:,pol_ind,cal_ind,:]**2,0) mean += ma.sum(Data.data[:,pol_ind,cal_ind,:],0) counts += (Data.dims[0] - ma.count_masked(Data.data[:,pol_ind,cal_ind,:], 0)) var = var/counts - (mean/counts)**2 var[counts <= 1] = ma.masked var[var <= 0.0] = ma.masked return var
def calculate_moments(d,minchan=False,maxchan=False,vel=False,bestmask=False,mask=False): nglat = d.shape[1] nglon = d.shape[2] nspec = d.shape[0] maps = np.zeros((nglat,nglon),dtype={'names':['mean','sd','errmn', 'errsd','skew','kurt','error','intint','npix'], 'formats':['f4','f4','f4','f4','f4','f4','f4','f4','f4']}) #These definitions for mask seem backward but are correct. noise_portion = ma.masked_where(mask == 1,d) good_d = d[minchan:maxchan,...] mask2 = mask[minchan:maxchan,...] #print(mask) #print(mask2) print(minchan) print(maxchan) signal_portion = ma.masked_where(mask2 == 0,good_d) maps['error'] = ma.std(noise_portion,axis=0) maps['intint'] = ma.sum(signal_portion,axis=0) #print(maps['error']) for x in range(nglat): for y in range(nglon): fullspec = d[...,x,y]#Exract a single spectrum ind = np.arange(nspec) velmask = mask[minchan:maxchan,x,y] if np.sum(velmask) != 0: velmask = bestmask npix = max(np.sum(velmask),1) ind = ind[velmask > 0] sigma = maps['error'][x,y] if ind.size > 2 and (sigma > 0): mom = idl_stats.wt_moment(vel[ind],fullspec[ind], errors = np.zeros(ind.size)+sigma) maps['mean'][x,y] = mom['mean'] maps['sd'][x,y] = mom['stdev'] maps['errmn'][x,y] = mom['errmn'] maps['errsd'][x,y] = mom['errsd'] maps['npix'][x,y] = npix else: maps['mean'][x,y] = np.nan maps['sd'][x,y] = np.nan maps['errmn'][x,y] = np.nan maps['errsd'][x,y] = np.nan maps['npix'][x,y] = np.nan return(maps)