def __setitem__(self, indx, value): """ Sets the given record to value. """ MaskedArray.__setitem__(self, indx, value) if isinstance(indx, basestring): self._mask[indx] = ma.getmaskarray(value)
def unwrap_py(inph,in_p=(), uv=2*pi): """Return the input matrix unwraped the valu given in uv The same as unwrapv, but using for-s, written in python """ if not is_masked(inph): fasei=MaskedArray(inph, isnan(inph)) else: fasei=inph nx, ny=(fasei.shape[0],fasei.shape[1]) # If the initial unwraping point is not given, take the center of the image # as initial coordinate if in_p==(): in_p=(int(nx/2),int(ny/2)) # Create a temporal space to mark if the points are already unwrapped # 0 the point has not been unwrapped # 1 the point has not been unwrapped, but it is in the unwrapping list # 2 the point was already unwrapped fl=zeros((nx, ny)) # List containing the points to unwrap l_un=[in_p] fl[in_p]=1 # unwrapped values faseo=fasei.copy() while len(l_un)>0: # remove the first value from the list cx, cy=l_un.pop(0) # Put the coordinates of unwrapped the neigbors in the list # And check for wrapping nv=0 wv=0 for i in range(cx-1, cx+2): for j in range(cy-1, cy+2): if (i>-1) and (i<nx) and (j>-1) and (j<ny): if (fl[i, j]==0)&(faseo.mask[i, j]==False): fl[i, j]=1 l_un.append((i, j)) elif fl[i, j]==2: wv=wv+rint((faseo[i, j]-faseo[cx, cy])/uv) nv=nv+1 if nv!=0: wv=wv/nv fl[cx, cy]=2 faseo[cx, cy]=faseo[cx, cy]+wv*uv return faseo
def __array_finalize__(self,obj): """ This function is needed for numpy subclassing because of the ways numpy arrays can be constructed. """ if obj is None: return self._err = getattr(obj, 'err', noerr) self._name = getattr(obj, 'name', '') self._units = getattr(obj, 'units', '') MaskedArray.__array_finalize__(self, obj)
def test_numpy_timelike_column_with_null(dsn, configuration): fill_value = 0; with open_cursor(configuration) as cursor: with query_fixture(cursor, configuration, 'INSERT TIMESTAMP') as table_name: cursor.execute('INSERT INTO {} VALUES (?)'.format(table_name), [None]) cursor.execute('SELECT a FROM {}'.format(table_name)) results = cursor.fetchallnumpy() expected = MaskedArray([42], mask=[1], dtype='datetime64[us]') assert_equal(results[_fix_case(configuration, 'a')].filled(fill_value), expected.filled(fill_value))
def __setitem__(self, indx, value): """x.__setitem__(i, y) <==> x[i]=y Sets item described by indx. If value is masked, masks those locations. Errors are also replaced if there are errors in both source and destination. """ MaskedArray.__setitem__(self, indx, value) if isinstance(value, Dvect) and self._err is not noerr and value._err is not noerr: print (len(indx), len(self._err), len(value._err)) self._err[indx] = value._err
def peak(self): # calculate the PEAK position and flux and fwhm # if fwhm == 0: then use all pixels dma=MaskedArray(self.data_slc,mask=self.mask_slc) indmax=np.unravel_index(dma.argmax(),dma.shape) x_peak=self.r_slc[indmax[0]] y_peak=self.c_slc[indmax[1]] flux_peak=self.data_slc[indmax] flux_peak_error=self.udata_slc[indmax] fwhm=2.0*np.sqrt(np.sum((np.logical_not(self.mask_slc)) & (self.data_slc >= flux_peak/2.0))/np.pi) if fwhm<=0: fwhm=2.0*np.sqrt(np.sum(self.data_slc >= flux_peak/2.0)/np.pi) if fwhm<=0: fwhm=2.0*np.sqrt(len(self.data_slc)/np.pi) x_peak,y_peak=y_peak+1.0,x_peak+1.0 return x_peak,y_peak,flux_peak,flux_peak_error,fwhm
def _genTimeSeries(reduce_args, state): import scikits.timeseries.tseries as ts from numpy import ndarray from numpy.ma import MaskedArray time_series = ts._tsreconstruct(*reduce_args) # from setstate modified (ver, shp, typ, isf, raw, msk, flv, dsh, dtm, dtyp, frq, infodict) = state # print 'regenerating %s' % dtyp MaskedArray.__setstate__(time_series, (ver, shp, typ, isf, raw, msk, flv)) _dates = time_series._dates # _dates.__setstate__((ver, dsh, typ, isf, dtm, frq)) #use remote typ ndarray.__setstate__(_dates, (dsh, dtyp, isf, dtm)) _dates.freq = frq _dates._cachedinfo.update(dict(full=None, hasdups=None, steps=None, toobj=None, toord=None, tostr=None)) # Update the _optinfo dictionary time_series._optinfo.update(infodict) return time_series
def __new__(cls, *args, **kwargs): ''' Create new object. No default mapping - use empty dictionary. ''' values_mapping = kwargs.pop('values_mapping', {}) obj = MaskedArray.__new__(MaskedArray, *args, **kwargs) obj.__class__ = MappedArray # Must occur after class change for obj.state to update! obj.values_mapping = values_mapping return obj
def createNetCDF4(self,fpath,matrix,dtype, fillvalue = None): if not os.path.exists(os.path.dirname(fpath)): try: os.makedirs(os.path.dirname(fpath)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise rootgrp = Dataset(fpath, "w", format="NETCDF4") # add dimensions lat = rootgrp.createDimension("lat", DileGeometry.YSIZE) lon = rootgrp.createDimension("lon", DileGeometry.XSIZE) # add variables latitudes = rootgrp.createVariable("lat","f4",("lat",)) longitudes = rootgrp.createVariable("lon","f4",("lon",)) latitudes.units = 'degrees_north' latitudes.axis = 'Y' longitudes.units = 'degrees_east' longitudes.axis = 'X' bb = self.asBoundingBox() lats = linspace(bb['lat_min'],bb['lat_max'],self.YSIZE, endpoint=True) lons = linspace(bb['lon_min'],bb['lon_max'],self.XSIZE, endpoint=True) latitudes[:] = lats longitudes[:] = lons if fillvalue: data = rootgrp.createVariable(self.variable,dtype,("lat","lon",),fill_value = fillvalue) else: data = rootgrp.createVariable("data",dtype,("lat","lon",)) if isinstance(matrix, MaskedArray): data[:] = MaskedArray.copy(matrix) else: data[:] = matrix for attr in self.attributes: if attr != '_FillValue': data.setncattr(attr,self.attributes[attr]) rootgrp.close() return fpath
def get_array(self, submask=None): ''' Get the Parameter's array with an optional submask substituted for the mask. :param submask: Name of submask to return with the array. :type submask: str or None ''' if not submask: return self.array if submask not in self.submasks: return None if isinstance(self.array, MappedArray): return MappedArray(self.array.data, mask=self.submasks[submask].copy(), values_mapping=self.array.values_mapping) else: return MaskedArray(self.array.data, mask=self.submasks[submask].copy())
def __new__(cls, data, err=noerr, name='', units='', mask=nomask, dtype=None, edtype=None): """ Construct a Dvect from an input array which we try to make into a masked array as far as possible """ # Try to make a masked array out of the input #obj = masked_array(data, dtype=dtype, mask=mask, subok=True).view(cls) obj = MaskedArray.__new__(cls, data, dtype=dtype, mask=mask) if not isinstance(obj, Dvect): obj = obj.view(cls) # Add attributes if err is not noerr: obj._err = array(err, dtype=edtype) if obj._err.shape != obj.shape: raise DvectError('Dvect.__new__: errors and data are incompatible') obj._name = name obj._units = units return obj
def test_numpy_timelike_column_larger_than_batch_size(dsn, configuration): timestamps = [ datetime.datetime(2015, 12, 31, 1, 2, 3), datetime.datetime(2016, 1, 5, 4, 5, 6), datetime.datetime(2017, 2, 6, 7, 8, 9), datetime.datetime(2018, 3, 7, 10, 11, 12), datetime.datetime(2019, 4, 8, 13, 14, 15) ] with open_cursor(configuration, rows_to_buffer=2) as cursor: with query_fixture(cursor, configuration, 'INSERT TIMESTAMP') as table_name: cursor.executemany('INSERT INTO {} VALUES (?)'.format(table_name), [[timestamp] for timestamp in timestamps]) cursor.execute('SELECT a FROM {} ORDER BY a'.format(table_name)) results = cursor.fetchallnumpy() expected = MaskedArray(timestamps, mask=[0], dtype='datetime64[us]') assert_equal(results[_fix_case(configuration, 'a')], expected)
def rainrate_fixture() -> Cube: """Masked rain rates in mm/h""" nonzero_data = np.array( [ [0.03, 0.1, 0.1, 0.1, 0.03], [0.1, 0.2, 0.2, np.nan, 0.1], [0.2, 0.5, np.nan, np.nan, 0.2], [0.1, 0.5, np.nan, np.nan, 0.1], [0.03, 0.2, 0.2, 0.1, 0.03], ] ) data = np.zeros((16, 16), dtype=np.float32) data[5:12, 5:12] = np.full((7, 7), 0.03, dtype=np.float32) data[6:11, 6:11] = nonzero_data.astype(np.float32) mask = np.where(np.isfinite(data), False, True) m_data = MaskedArray(data, mask=mask) cube = set_up_variable_cube( m_data, name="lwe_precipitation_rate", units="mm h-1", spatial_grid="equalarea" ) return cube
def create_cv_results(scores, candidate_params, n_splits, error_score, weights): if len(scores[0]) == 4: fit_times, test_scores, score_times, train_scores = zip(*scores) else: fit_times, test_scores, score_times = zip(*scores) train_scores = None test_scores = [error_score if s is FIT_FAILURE else s for s in test_scores] if train_scores is not None: train_scores = [error_score if s is FIT_FAILURE else s for s in train_scores] # Construct the `cv_results_` dictionary results = {'params': candidate_params} n_candidates = len(candidate_params) if weights is not None: weights = np.broadcast_to(weights[None, :], (len(candidate_params), len(weights))) _store(results, 'test_score', test_scores, n_splits, n_candidates, splits=True, rank=True, weights=weights) _store(results, 'fit_time', fit_times, n_splits, n_candidates) _store(results, 'score_time', score_times, n_splits, n_candidates) if train_scores is not None: _store(results, 'train_score', train_scores, n_splits, n_candidates, splits=True) # Use one MaskedArray and mask all the places where the param is not # applicable for that candidate. Use defaultdict as each candidate may # not contain all the params param_results = defaultdict(lambda: MaskedArray(np.empty(n_candidates), mask=True, dtype=object)) for cand_i, params in enumerate(candidate_params): for name, value in params.items(): param_results["param_%s" % name][cand_i] = value results.update(param_results) return results
def set_vars(variables, dataset, group_name=None): ''' set NetCDF Variables use dataset.createVariable(),set Variable's attr and create MaskedArray :param hdf: :param dataset: :return: ''' for v in variables: # 遍历variables type: orderdict var = variables[v] if isinstance(var, Variable): create_var = dataset.createVariable(varname=var.name, datatype=var.datatype, dimensions=var.dimensions, endian=var.endian(), chunksizes=None) # 内置方法创建变量 # 设置属性信息 for attr in var.ncattrs(): # 遍历属性信息 type:list if isinstance(create_var, Variable): create_var.setncattr(attr, var.getncattr(attr)) # 设置属性 # 设置组信息 if group_name != None: create_var.setncattr("group_name", group_name) # 设置属性,自定义的信息 var_data = var[::] # 数据信息 if isinstance(var_data, MaskedArray): fill_value = var_data.get_fill_value( ) # Return the filling value of the masked array data = var_data.data # Return the current data, as a view of the original underlying data # MaskedArray对象 maskedArray = MaskedArray(data, dtype=var_data.dtype, fill_value=fill_value) create_var[::] = maskedArray # 变量数据赋值 # data2=create_var[::] else: create_var[::] = var[::] return dataset
def __repr__(self): name = 'mapped_array' parameters = dict( name=name, nlen=" " * len(name), data=self.raw, # WARNING: SLOW! sdata=MaskedArray( [self.values_mapping.get(x, NO_MAPPING) for x in self.data], mask=self.mask), mask=self._mask, fill=self.fill_value, dtype=self.dtype, values=self.values_mapping) short_std = """\ masked_%(name)s(values = %(sdata)s, %(nlen)s data = %(data)s, %(nlen)s mask = %(mask)s, %(nlen)s fill_value = %(fill)s, %(nlen)svalues_mapping = %(values)s) """ return short_std % parameters
def __new__( cls, data, geotrans=None, proj=None, fill_value=None, fobj=None, color_mode=None, # mask=None, yvalues=None, xvalues=None, mode="r", *args, **kwargs): # NOTE: The mask will always be calculated, even if its # already present or not needed at all... mask = (np.zeros_like(data, np.bool) if fill_value is None else data == fill_value) self = MaskedArray.__new__(cls, data=data, fill_value=fill_value, mask=mask, *args, **kwargs) self.unshare_mask() self.__dict__["geotrans"] = geotrans self.__dict__["proj"] = _Projection(proj) self.__dict__["color_mode"] = color_mode self.__dict__["mode"] = mode self.__dict__["_fobj"] = fobj self.__dict__["_yvalues"] = yvalues self.__dict__["_xvalues"] = xvalues return self
def __str__(self): return str( MaskedArray( [self.values_mapping.get(x, NO_MAPPING) for x in self.data], mask=self.mask))
def __eq__(self, other): ''' Allow comparison with Strings such as array == 'state' ''' return MaskedArray.__eq__(self.raw, self.__coerce_type(other))
def _format_results(self, n_splits, out): """Helper to generate the ``cv_results_`` dictionary. Args: n_splits (int): integer specifying the number of folds when doing cross-validation. out (:obj:`ExperimentAnalysis`): Object returned by `tune.run`. Returns: results (:obj:`dict`): Dictionary of results to use for the interface's ``cv_results_``. """ dfs = list(out.fetch_trial_dataframes().values()) finished = [df.iloc[[-1]] for df in dfs] test_scores = {} train_scores = {} for name in self.scoring: test_scores[name] = [ df[[ col for col in dfs[0].columns if "split" in col and "test_%s" % name in col ]].to_numpy() for df in finished ] if self.return_train_score: train_scores[name] = [ df[[ col for col in dfs[0].columns if "split" in col and "train_%s" % name in col ]].to_numpy() for df in finished ] else: train_scores = None configs = out.get_all_configs() candidate_params = [ self._clean_config_dict(configs[config_key]) for config_key in configs ] results = {"params": candidate_params} n_candidates = len(candidate_params) def _store( results, key_name, array, n_splits, n_candidates, weights=None, splits=False, rank=False, ): """A small helper to store the scores/times to the cv_results_""" # When iterated first by n_splits and then by parameters array = np.array(array, dtype=np.float64).reshape( (n_candidates, n_splits)) if splits: for split_i in range(n_splits): results["split%d_%s" % (split_i, key_name)] = array[:, split_i] array_means = np.average(array, axis=1, weights=weights) results["mean_%s" % key_name] = array_means # Weighted std is not directly available in numpy array_stds = np.sqrt( np.average((array - array_means[:, np.newaxis])**2, axis=1, weights=weights)) results["std_%s" % key_name] = array_stds if rank: results["rank_%s" % key_name] = np.asarray(rankdata( -array_means, method="min"), dtype=np.int32) for name in self.scoring: _store( results, "test_%s" % name, test_scores[name], n_splits, n_candidates, splits=True, rank=True, ) if self.return_train_score: for name in self.scoring: _store( results, "train_%s" % name, train_scores[name], n_splits, n_candidates, splits=True, rank=True, ) results["time_total_s"] = np.array( [df["time_total_s"].to_numpy() for df in finished]).flatten() # Use one MaskedArray and mask all the places where the param is not # applicable for that candidate. Use defaultdict as each candidate may # not contain all the params param_results = defaultdict(lambda: MaskedArray( np.empty(n_candidates), mask=True, dtype=object, )) for cand_i, params in enumerate(candidate_params): for name, value in params.items(): param_results["param_%s" % name][cand_i] = value results.update(param_results) return results
def __ne__(self, other): ''' In MappedArrays, != is always the opposite of == ''' return MaskedArray.__ne__(self.raw, self.__coerce_type(other))
def __le__(self, other): return MaskedArray.__le__(self.raw, self.__coerce_type(other))
def __gt__(self, other): ''' works - but comparing against string states is not recommended ''' return MaskedArray.__gt__(self.raw, self.__coerce_type(other))
# depth levels depth = np.arange(0., 60., 10.) # data code and product names for output files data_code = 'STZ' product_name_original = 'NRSMAI-long-timeseries' product_name_interpolated = 'NRSMAI-long-timeseries-interpolated' ### Parse command-line arguments parser = argparse.ArgumentParser() parser.add_argument('matfile', help='input Matlab file') args = parser.parse_args() ### Read in variables from mat file data = loadmat(args.matfile, squeeze_me=True) tem_mar = MaskedArray(data['tem_mar'], mask=np.isnan(data['tem_mar'])) sal_mar = MaskedArray(data['sal_mar'], mask=np.isnan(data['sal_mar'])) tem_mi = MaskedArray(data['tem_mi'], mask=np.isnan(data['tem_mi'])) sal_mi = MaskedArray(data['sal_mi'], mask=np.isnan(data['sal_mi'])) # convert time variables to days from our chosen epoch dt = input_epoch - output_epoch # difference as a timedelta object dt = dt.total_seconds() / 24. / 3600. # difference in decimal days time_mar_t = data['time_mar_t'] + dt time_mar_s = data['time_mar_s'] + dt time_mi = data['time_mi'] + dt ### Put original sampling variables onto a common time dimension # Create a single time variable
import numpy from numpy.ma import MaskedArray a = MaskedArray() F = '' numpy.ma.dump(a, F)
def _getseries(self): "Returns the data as a MaskedRecord array." return MaskedArray.view(self, mrecarray)
def read(self, files, pair=False, ampcor=True, lincor=True, flatcor=True, abba_test=True): """Read uSpeX files. Parameters ---------- files : string or list A file name or list thereof. pair : bool, optional Assume the observations are taken in AB(BA) mode and return A-B for each pair. ampcor : bool optional Set to `True` to apply the amplifcation noise correction. lincor : bool, optional Set to `True` to apply the linearity correction. flatcor : bool, optional Set to `True` to apply flat field correction. abba_test : bool, optional Set to `True` to test for AB(BA) ordering when `pair` is `True`. If `abba_test` is `False`, then the file order is not checked. Returns ------- stack : MaskedArray The resultant image(s). [counts / s] var : MaskedArray The variance. [total DN] headers : list or astropy FITS header If `pair` is `True`, the headers will be a list of lists, where each element is a list containing the A and B headers. """ from numpy.ma import MaskedArray if isinstance(files, (list, tuple)): print('Loading {} files.'.format(len(files))) stack = MaskedArray(np.empty((len(files), 2048, 2048))) var = MaskedArray(np.empty((len(files), 2048, 2048))) headers = [] for i in range(len(files)): kwargs = dict(pair=False, ampcor=ampcor, lincor=lincor, flatcor=flatcor) stack[i], var[i], h = self.read(files[i], **kwargs) headers.append(h) if pair: print('\nAB(BA) pairing and subtracting.') a = np.flatnonzero( np.array([h['BEAM'] == 'A' for h in headers])) b = np.flatnonzero( np.array([h['BEAM'] == 'B' for h in headers])) if abba_test: # require equal numbers of a and b if len(a) != len(b): raise ValueError('Number of A beams not equal to' ' number of B beams') # each A-B pair should be number neighbors for i, j in zip(a, b): if abs(i - j) != 1: raise ValueError('Found invalid A-B pair: ' + headers[i]['IRAFNAME'] + ' ' + headers[j]['IRAFNAME']) stack_AB = [] var_AB = [] headers_AB = [] for i, j in zip(a, b): stack_AB.append(stack[i] - stack[j]) var_AB.append(var[i] + var[j]) headers_AB.append([headers[i], headers[j]]) stack_AB = np.ma.MaskedArray(stack_AB) var_AB = np.ma.MaskedArray(var_AB) return stack_AB, var_AB, headers_AB return stack, var, headers print('Reading {}'.format(files)) data = fits.open(files, lazy_load_hdus=False) data[0].verify('silentfix') # check if already processed if 'SPEX60' in data[0].header: mask = data['mask'].astype(bool) im = np.ma.MaskedArray(data['sci'].data, mask=mask) var = data['var'].data if 'b header' in data: h = [data['sci'].header, data['b header'].header] else: h = data['sci'].header data.close() return im, var, h h = data[0].header.copy() read_var = (2 * config['readnoise']**2 / h['NDR'] / h['CO_ADDS'] / h['ITIME']**2 / config['gain']**2) # TABLE_SE is read time, not sure what crtn is. crtn = (1 - h['TABLE_SE'] * (h['NDR'] - 1) / 3.0 / h['ITIME'] / h['NDR']) t_exp = h['ITIME'] * h['CO_ADDS'] im_p = data[1].data / h['DIVISOR'] im_s = data[2].data / h['DIVISOR'] data.close() mask_p = im_p < (self.cal.bias - config['lincor max']) mask_s = im_s < (self.cal.bias - config['lincor max']) mask = mask_p + mask_s h.add_history('Masked saturated pixels.') im = MaskedArray(im_p - im_s, mask) if ampcor: im = self._ampcor(im) h.add_history('Corrected for amplifier noise.') if lincor: cor = self._lincor(im) cor[mask] = 1.0 cor[:4] = 1.0 cor[:, :4] = 1.0 cor[2044:] = 1.0 cor[:, 2044:] = 1.0 im /= cor h.add_history('Applied linearity correction.') if flatcor: if self.flat is None: raise ValueError( "Flat correction requested but flat not loaded.") im /= self.flat h.add_history('Flat corrected.') # total DN var = (np.abs(im * h['DIVISOR']) * crtn / h['CO_ADDS']**2 / h['ITIME']**2 / config['gain'] + read_var ) # / h['DIVISOR']**2 / h['ITIME']**2 # counts / s im = im / h['ITIME'] im.mask += self.mask return im, var, h
def _format_results(self, n_splits, out): """Helper to generate the ``cv_results_`` dictionary. Args: n_splits (int): integer specifying the number of folds when doing cross-validation. out (:obj:`ExperimentAnalysis`): Object returned by `tune.run`. Returns: results (:obj:`dict`): Dictionary of results to use for the interface's ``cv_results_``. """ trials = [ trial for trial in out.trials if trial.status == Trial.TERMINATED ] trial_dirs = [trial.logdir for trial in trials] # The result dtaframes are indexed by their trial logdir trial_dfs = out.fetch_trial_dataframes() # Try to find a template df to use for trials that did not return # any results. These trials should copy the structure and fill it # with NaNs so that the later reshape actions work. template_df = None fix_trial_dirs = [] # Holds trial dirs with no results for trial_dir in trial_dirs: if trial_dir in trial_dfs and template_df is None: template_df = trial_dfs[trial_dir] elif trial_dir not in trial_dfs: fix_trial_dirs.append(trial_dir) # Create NaN dataframes for trials without results if fix_trial_dirs: if template_df is None: # No trial returned any results return {} for trial_dir in fix_trial_dirs: trial_df = pd.DataFrame().reindex_like(template_df) trial_dfs[trial_dir] = trial_df # Keep right order dfs = [trial_dfs[trial_dir] for trial_dir in trial_dirs] finished = [df.iloc[[-1]] for df in dfs] test_scores = {} train_scores = {} for name in self.scoring: test_scores[name] = [ df[[ col for col in dfs[0].columns if "split" in col and "test_%s" % name in col ]].to_numpy() for df in finished ] if self.return_train_score: train_scores[name] = [ df[[ col for col in dfs[0].columns if "split" in col and "train_%s" % name in col ]].to_numpy() for df in finished ] else: train_scores = None configs = [trial.config for trial in trials] candidate_params = [ self._clean_config_dict(config) for config in configs ] results = {"params": candidate_params} n_candidates = len(candidate_params) def _store( results, key_name, array, n_splits, n_candidates, weights=None, splits=False, rank=False, ): """A small helper to store the scores/times to the cv_results_""" # When iterated first by n_splits and then by parameters array = np.array(array, dtype=np.float64).reshape( (n_candidates, n_splits)) if splits: for split_i in range(n_splits): results["split%d_%s" % (split_i, key_name)] = array[:, split_i] array_means = np.average(array, axis=1, weights=weights) results["mean_%s" % key_name] = array_means # Weighted std is not directly available in numpy array_stds = np.sqrt( np.average((array - array_means[:, np.newaxis])**2, axis=1, weights=weights)) results["std_%s" % key_name] = array_stds if rank: results["rank_%s" % key_name] = np.asarray(rankdata( -array_means, method="min"), dtype=np.int32) for name in self.scoring: _store( results, "test_%s" % name, test_scores[name], n_splits, n_candidates, splits=True, rank=True, ) if self.return_train_score: for name in self.scoring: _store( results, "train_%s" % name, train_scores[name], n_splits, n_candidates, splits=True, rank=True, ) results["time_total_s"] = np.array( [df["time_total_s"].to_numpy() for df in finished]).flatten() results["training_iteration"] = np.array([ df["training_iteration"].to_numpy() for df in finished ]).flatten() # Use one MaskedArray and mask all the places where the param is not # applicable for that candidate. Use defaultdict as each candidate may # not contain all the params param_results = defaultdict(lambda: MaskedArray( np.empty(n_candidates), mask=True, dtype=object, )) for cand_i, params in enumerate(candidate_params): for name, value in params.items(): param_results["param_%s" % name][cand_i] = value results.update(param_results) return results
def _to_masked_columns(values, dtype, mask, column_backend): if column_backend == 'numpy': return [MaskedArray(values, mask=mask, dtype=dtype)] elif column_backend == 'arrow': columns = pa.array(array(values, dtype=dtype), mask=array(mask)) return pa.Table.from_arrays([columns], ['column'])
plt.title('Radar reflectivity') plt.xlabel('Time [' + time_offset_radar_refl.units + ']') plt.ylabel('Altitude [m]') plt.figure() #plt.show() #pdb.set_trace() # uniformCloudBlock = close-up selection of contiguous cloud values. # Each column is a different altitude. uniformCloudBlock = zeros((lenTimestepRangeCloud,lenLevelRangeCloud)) for col in range(0,lenLevelRangeCloud): uniformCloudBlock[:,col] = rankdata(reflCloudBlock[:,col]) / \ MaskedArray.count(reflCloudBlock[:,col]) uniformCloudBlock = masked_where( uniformCloudBlock == 0, uniformCloudBlock ) # I'm not sure if it's appropriate to rank first, then fill. # So I'm not sure if this is correct. uniformCloudBlockFilled = filled(uniformCloudBlock,fill_value=0) plt.clf() for idx in range(1,5): plt.subplot(2,2,idx) plt.plot(uniformCloudBlockFilled[:,5],uniformCloudBlockFilled[:,idx],'.') plt.title('Copula') plt.figure() #pdb.set_trace() #plt.ion() # Use interactive mode so that program continues when plot appears
def _median(self, data): background = np.ma.median(MaskedArray(self._rolling_window(data.data, self.smoothing,pad=True), \ mask=self._rolling_window(data.mask, self.smoothing, pad=True,mode='edge')),axis=-1) return background.data
def mean_and_stdev(raypaths): minlen = min(map(len, raypaths)) maxlen = max(map(len, raypaths)) # newtimes = empty(maxlen) meandepth = empty(maxlen) meanxt = empty(maxlen) depths = MaskedArray(empty((len(raypaths), maxlen)), mask=numpy.zeros((len(raypaths), maxlen), dtype=bool)) xts = MaskedArray(empty((len(raypaths), maxlen)), mask=numpy.zeros((len(raypaths), maxlen), dtype=bool)) times = MaskedArray(empty((len(raypaths), maxlen)), mask=numpy.zeros((len(raypaths), maxlen), dtype=bool)) # Build up some numpy arrays for i, r in enumerate(raypaths): # Set the used values depths[i, :len(r)] = r.getDepths() xts[i, :len(r)] = r.getAcrossTracks() times[i, :len(r)] = r.getTimes() # For the longest row, save the times to be our return value if len(r) == maxlen: newtimes = r.getTimes() # Mask out the unused valuse mask = [[True for _ in range(maxlen - len(r))]] depths.mask[i, len(r):] = deepcopy(mask) xts.mask[i, len(r):] = deepcopy(mask) times.mask[i, len(r):] = deepcopy(mask) # Calculate the stats mean_depths = depths.mean(axis=0) # depths.filled(0).sum(axis=0) / depths.count(axis=0) mean_xts = xts.mean(axis=0) # xts.filled(0).sum(axis=0) / xts.count(axis=0) stdev_depths = [depths[:, i].compressed().std() for i in range(maxlen)] stdev_xts = [xts[:, i].compressed().std() for i in range(maxlen)] return array([newtimes, mean_depths, mean_xts, stdev_depths, stdev_xts]).transpose()
def _process_outputs(self, out, n_splits): """return results dict and best dict for given outputs""" # if one choose to see train score, "out" will contain train score info if self.return_train_score: (train_scores, test_scores, test_sample_counts, fit_time, score_time, parameters) = zip(*out) else: (test_scores, test_sample_counts, fit_time, score_time, parameters) = zip(*out) candidate_params = parameters[::n_splits] n_candidates = len(candidate_params) results = dict() # Computed the (weighted) mean and std for test scores alone # NOTE test_sample counts (weights) remain the same for all candidates test_sample_counts = np.array(test_sample_counts[:n_splits], dtype=np.int) results = self._store_results( results, n_splits, n_candidates, 'test_score', test_scores, splits=True, rank=True, weights=test_sample_counts if self.iid else None) if self.return_train_score: results = self._store_results(results, n_splits, n_candidates, 'train_score', train_scores, splits=True) results = self._store_results(results, n_splits, n_candidates, 'fit_time', fit_time) results = self._store_results(results, n_splits, n_candidates, 'score_time', score_time) best_index = np.flatnonzero(results["rank_test_score"] == 1)[0] # Use one np.ma.MaskedArray and mask places where param not # applicable for that candidate. Use defaultdict as each candidate may # not contain all the params param_vals = {} for cand_idx, params in enumerate(candidate_params): for name, value in params.items(): # An all masked empty array gets created for the key # `"param_%s" % name` at the first occurence of `name`. # Setting the value at an index also unmasks that index param = "param_" + name if param not in param_vals: # Map candidates-to-values. Defaults to a masked np.empty param_vals[param] = MaskedArray(np.empty(n_candidates, ), mask=True, dtype=object) param_vals[param][cand_idx] = value results.update(param_vals) # Store a list of param dicts at the key 'params' results['params'] = candidate_params return results, best_index
def unwrapv(inph,in_p=(), uv=2*pi): """Return the input matrix unwraped the value given in uv This is a vectorized routine, but is not as fast as it should """ if not is_masked(inph): fasei=MaskedArray(inph, isnan(inph)) else: fasei=inph.copy() size=fasei.shape nx, ny=size # If the initial unwraping point is not given, take the center of the image # as initial coordinate if in_p==(): in_p=(int(size[0]/2),int(size[1]/2)) # Create a temporal space to mark if the points are already unwrapped # 0 the point has not been unwrapped # 1 the point has not been unwrapped, but it is in the unwrapping list # 2 the point was already unwrapped fl=N.zeros(size) # List containing the points to unwrap l_un=[in_p] fl[in_p]=1 # unwrapped values faseo=fasei.copy() XI_, YI_= meshgrid(range(-1, 2), range(-1, 2)) XI_=XI_.flatten() YI_=YI_.flatten() while len(l_un)>0: # remove the first value from the list unp=l_un.pop(0) #l_un[0:1]=[] XI=XI_+unp[0] YI=YI_+unp[1] #Remove from the list the values where XI is negative nxi=XI>-1 nyi=YI>-1 nxf=XI<nx nyf=YI<ny n=nonzero(nxi& nyi & nxf & nyf) lco=zip(XI[n], YI[n]) # Put the coordinates of unwrapped the neigbors in the list # And check for wrapping nv=0 wv=0 for co in lco: if (fl[co]==0) & (faseo.mask[co]==False): fl[co]=1 l_un.append(co) elif fl[co]==2: wv=wv+rint((faseo[co]-faseo[unp])/uv) nv=nv+1 if nv!=0: wv=wv/nv #if wv>=0: wv=int(wv+0.5) #else: wv=int(wv-0.5) fl[unp]=2 faseo[unp]=faseo[unp]+wv*uv return faseo
def _extract_iloc_masked_array(self, key: GetItemKeyType) -> MaskedArray: '''Produce a new boolean Series of the same shape, where the values selected via iloc selection are True. ''' mask = self._extract_iloc_mask(key=key) return MaskedArray(data=self.values, mask=mask.values)
def test_column_with_incompatible_dtype_raises(dsn, configuration): with open_cursor(configuration) as cursor: with query_fixture(cursor, configuration, 'INSERT INTEGER') as table_name: columns = [MaskedArray([1, 2, 3], mask=False, dtype='int16')] with pytest.raises(turbodbc.InterfaceError): cursor.executemanycolumns("INSERT INTO {} VALUES (?)".format(table_name), columns)
def create_cv_results( scores, candidate_params, n_splits, error_score, weights, multimetric ): if len(scores[0]) == 4: fit_times, test_scores, score_times, train_scores = zip(*scores) else: fit_times, test_scores, score_times = zip(*scores) train_scores = None if not multimetric: test_scores = [error_score if s is FIT_FAILURE else s for s in test_scores] if train_scores is not None: train_scores = [ error_score if s is FIT_FAILURE else s for s in train_scores ] else: test_scores = { k: [error_score if x is FIT_FAILURE else x[k] for x in test_scores] for k in multimetric } if train_scores is not None: train_scores = { k: [error_score if x is FIT_FAILURE else x[k] for x in train_scores] for k in multimetric } # Construct the `cv_results_` dictionary results = {"params": candidate_params} n_candidates = len(candidate_params) if weights is not None: weights = np.broadcast_to( weights[None, :], (len(candidate_params), len(weights)) ) _store(results, "fit_time", fit_times, n_splits, n_candidates) _store(results, "score_time", score_times, n_splits, n_candidates) if not multimetric: _store( results, "test_score", test_scores, n_splits, n_candidates, splits=True, rank=True, weights=weights, ) if train_scores is not None: _store( results, "train_score", train_scores, n_splits, n_candidates, splits=True, ) else: for key in multimetric: _store( results, "test_{}".format(key), test_scores[key], n_splits, n_candidates, splits=True, rank=True, weights=weights, ) if train_scores is not None: for key in multimetric: _store( results, "train_{}".format(key), train_scores[key], n_splits, n_candidates, splits=True, ) # Use one MaskedArray and mask all the places where the param is not # applicable for that candidate. Use defaultdict as each candidate may # not contain all the params param_results = defaultdict( lambda: MaskedArray(np.empty(n_candidates), mask=True, dtype=object) ) for cand_i, params in enumerate(candidate_params): for name, value in params.items(): param_results["param_%s" % name][cand_i] = value results.update(param_results) return results
def __init__(self, path, keysubs={'/': '_'}, encoding='utf-8', default_llod_flag=-8888, default_llod_value='N/A', default_ulod_flag=-7777, default_ulod_value='N/A'): """ Arguments: self - implied input (not supplied in call) path - path to file keysubs - dictionary of characters to remove from variable keys and their replacements encoding - file encoding (utf-8, latin1, cp1252, etc.) default_llod_flag - flag value for lower limit of detections if not specified default_llod_value - default value to use for replacement of llod_flag default_ulod_flag - flag value for upper limit of detections if not specified default_ulod_value - default value to use for replacement of ulod_flag Returns: out - PseudoNetCDFFile interface to data in file. """ lastattr = None PseudoNetCDFFile.__init__(self) f = openf(path, 'rU', encoding=encoding) missing = [] units = [] line = f.readline() if ',' in line: delim = ',' else: delim = None def split(s): return [s_.strip() for s_ in s.split(delim)] if split(line)[-1] != '1001': raise TypeError("File is the wrong format. " + "Expected 1001; got %s" % (split(line)[-1], )) n, self.fmt = split(line) # n_user_comments = 0 n_special_comments = 0 self.n_header_lines = int(n) try: for li in range(self.n_header_lines - 1): li += 2 line = f.readline() LAST_VAR_DESC_LINE = 12 + len(missing) SPECIAL_COMMENT_COUNT_LINE = LAST_VAR_DESC_LINE + 1 LAST_SPECIAL_COMMENT_LINE = (SPECIAL_COMMENT_COUNT_LINE + n_special_comments) USER_COMMENT_COUNT_LINE = (12 + len(missing) + 2 + n_special_comments) if li == PI_LINE: self.PI_NAME = line.strip() elif li == ORG_LINE: self.ORGANIZATION_NAME = line.strip() elif li == PLAT_LINE: self.SOURCE_DESCRIPTION = line.strip() elif li == MISSION_LINE: self.MISSION_NAME = line.strip() elif li == VOL_LINE: self.VOLUME_INFO = ', '.join(split(line)) elif li == DATE_LINE: line = line.replace(',', ' ').replace('-', ' ').replace(' ', ' ').split() SDATE = ", ".join(line[:3]) WDATE = ", ".join(line[3:]) self.SDATE = SDATE self.WDATE = WDATE self._SDATE = datetime.strptime(SDATE, '%Y, %m, %d') self._WDATE = datetime.strptime(WDATE, '%Y, %m, %d') elif li == TIME_INT_LINE: self.TIME_INTERVAL = line.strip() elif li == UNIT_LINE: unitstr = line.replace('\n', '').replace('\r', '').strip() units.append(unitstr) self.INDEPENDENT_VARIABLE = units[-1] elif li == SCALE_LINE: scales = [eval(i) for i in split(line)] if set([float(s) for s in scales]) != set([1.]): raise ValueError( "Unsupported: scaling is unsupported. " + " data is scaled by %s" % (str(scales), )) elif li == MISSING_LINE: missing = [eval(i) for i in split(line)] elif li > MISSING_LINE and li <= LAST_VAR_DESC_LINE: nameunit = line.replace('\n', '').split(',') name = nameunit[0].strip() if len(nameunit) > 1: units.append(nameunit[1].strip()) elif re.compile('(.*)\((.*)\)').match(nameunit[0]): desc_groups = re.compile('(.*)\((.*)\).*').match( nameunit[0]).groups() name = desc_groups[0].strip() units.append(desc_groups[1].strip()) elif '_' in name: units.append(name.split('_')[1].strip()) else: warn('Could not find unit in string: "%s"' % line) units.append(name.strip()) elif li == SPECIAL_COMMENT_COUNT_LINE: n_special_comments = int(line.replace('\n', '')) elif (li > SPECIAL_COMMENT_COUNT_LINE and li <= LAST_SPECIAL_COMMENT_LINE): colon_pos = line.find(':') if line[:1] == ' ': k = lastattr v = getattr(self, k, '') + line else: k = line[:colon_pos].strip() v = line[colon_pos + 1:].strip() setattr(self, k, v) lastattr = k elif li == USER_COMMENT_COUNT_LINE: lastattr = None # n_user_comments = int(line.replace('\n', '')) elif (li > USER_COMMENT_COUNT_LINE and li < self.n_header_lines): colon_pos = line.find(':') if line[:1] == ' ': k = lastattr v = getattr(self, k, '') + line else: k = line[:colon_pos].strip() v = line[colon_pos + 1:].strip() setattr(self, k, v) lastattr = k elif li == self.n_header_lines: varstr = line.replace(',', ' ').replace(' ', ' ') variables = varstr.split() for oc, nc in keysubs.items(): variables = [vn.replace(oc, nc) for vn in variables] self.TFLAG = variables[0] except Exception as e: raise SyntaxError("Error parsing icartt file %s: %s" % (path, repr(e))) missing = missing[:1] + missing scales = [1.] + scales if hasattr(self, 'LLOD_FLAG'): llod_values = loddelim.sub('\n', self.LLOD_VALUE).split() if len(llod_values) == 1: llod_values *= len(variables) else: llod_values = ['N/A'] + llod_values assert len(llod_values) == len(variables) llod_values = [get_lodval(llod_val) for llod_val in llod_values] llod_flags = len(llod_values) * [self.LLOD_FLAG] llod_flags = [get_lodval(llod_flag) for llod_flag in llod_flags] else: llod_flags = [default_llod_flag] * len(scales) llod_values = [default_llod_value] * len(scales) if hasattr(self, 'ULOD_FLAG'): ulod_values = loddelim.sub('\n', self.ULOD_VALUE).split() if len(ulod_values) == 1: ulod_values *= len(variables) else: ulod_values = ['N/A'] + ulod_values assert len(ulod_values) == len(variables) ulod_values = [get_lodval(ulod_val) for ulod_val in ulod_values] ulod_flags = len(ulod_values) * [self.ULOD_FLAG] ulod_flags = [get_lodval(ulod_flag) for ulod_flag in ulod_flags] else: ulod_flags = [default_ulod_flag] * len(scales) ulod_values = [default_ulod_value] * len(scales) data = f.read() datalines = data.split('\n') ndatalines = len(datalines) while datalines[-1] in ('', ' ', '\r'): ndatalines -= 1 datalines.pop(-1) data = genfromtxt(StringIO('\n'.join(datalines).encode()), delimiter=delim, dtype='d') data = data.reshape(ndatalines, len(variables)) data = data.swapaxes(0, 1) self.createDimension('POINTS', ndatalines) for vi, var in enumerate(variables): scale = scales[vi] miss = missing[vi] unit = units[vi] dat = data[vi] llod_flag = llod_flags[vi] llod_val = llod_values[vi] ulod_flag = ulod_flags[vi] ulod_val = ulod_values[vi] vals = MaskedArray(dat, mask=dat == miss, fill_value=miss) tmpvar = self.variables[var] = PseudoNetCDFVariable(self, var, 'd', ('POINTS', ), values=vals) tmpvar.units = unit tmpvar.standard_name = var tmpvar.missing_value = miss tmpvar.fill_value = miss tmpvar.scale = scale if hasattr(self, 'LLOD_FLAG'): tmpvar.llod_flag = llod_flag tmpvar.llod_value = llod_val if hasattr(self, 'ULOD_FLAG'): tmpvar.ulod_flag = ulod_flag tmpvar.ulod_value = ulod_val def dtime(s): return timedelta(seconds=int(s), microseconds=(s - int(s)) * 1.E6) vtime = vectorize(dtime) tvar = self.variables[self.TFLAG] self._date_objs = (self._SDATE + vtime(tvar).view(type=ndarray))
def __setitem__(self, indx, value): "Sets the given record to value." MaskedArray.__setitem__(self, indx, value) if isinstance(indx, basestring): self._mask[indx] = ma.getmaskarray(value)
def unwrapv(inph, in_p=(), uv=2 * pi): """Return the input matrix unwrapped the value given in uv This is a vectorized routine, but is not as fast as it should """ if not is_masked(inph): fasei = MaskedArray(inph, isnan(inph)) else: fasei = inph.copy() size = fasei.shape nx, ny = size # If the initial unwraping point is not given, take the center of the image # as initial coordinate if in_p == (): in_p = (int(size[0] / 2), int(size[1] / 2)) # Create a temporal space to mark if the points are already unwrapped # 0 the point has not been unwrapped # 1 the point has not been unwrapped, but it is in the unwrapping list # 2 the point was already unwrapped fl = N.zeros(size) # List containing the points to unwrap l_un = [in_p] fl[in_p] = 1 # unwrapped values faseo = fasei.copy() XI_, YI_ = meshgrid(range(-1, 2), range(-1, 2)) XI_ = XI_.flatten() YI_ = YI_.flatten() while len(l_un) > 0: # remove the first value from the list unp = l_un.pop(0) #l_un[0:1]=[] XI = XI_ + unp[0] YI = YI_ + unp[1] #Remove from the list the values where XI is negative nxi = XI > -1 nyi = YI > -1 nxf = XI < nx nyf = YI < ny n = nonzero(nxi & nyi & nxf & nyf) lco = zip(XI[n], YI[n]) # Put the coordinates of unwrapped the neigbors in the list # And check for wrapping nv = 0 wv = 0 for co in lco: if (fl[co] == 0) & (faseo.mask[co] == False): fl[co] = 1 l_un.append(co) elif fl[co] == 2: wv = wv + rint((faseo[co] - faseo[unp]) / uv) nv = nv + 1 if nv != 0: wv = wv / nv #if wv>=0: wv=int(wv+0.5) #else: wv=int(wv-0.5) fl[unp] = 2 faseo[unp] = faseo[unp] + wv * uv return faseo
def __call__ (self, two): "Executes the call behavior." # first argument one = self.obj # carry out basic operation, transfer name and units func = getattr(super(Dvect, one), self.f) if self.inp: func(two) result = one else: result = MaskedArray(func(two), subok=True).view(type(one)) result._name = one._name result._units = one._units # handle the errors. They are worked out in a linear approximation # which requires the user to supply two partial derivative functions # in addition to the basic function. if self.fx is not None and self.fy is not None: if isinstance(two, Dvect): if one._err is noerr: result._err = getdata(np.abs(self.fy(one.dat, two.dat))*two._err) elif two._err is noerr: result._err = getdata(np.abs(self.fx(one.dat, two.dat))*one._err) else: if one is two: # Two inputs are identical and not independent if result.mask is nomask: result._err = np.hypot(self.fx(one.dat, two.dat), self.fy(one.dat, two.dat))*one._err else: result._err = np.where(result.mask, fillerr, getdata(np.hypot(self.fx(one.dat, two.dat), self.fy(one.dat, two.dat)*two._err))) pass else: # Two inputs are assumed to be independent. if result.mask is nomask: result._err = getdata(np.hypot(self.fx(one.dat, two.dat)*one._err, self.fy(one.dat, two.dat)*two._err)) else: result._err = np.where(result.mask, fillerr, getdata(np.hypot(self.fx(one.dat, two.dat)*one._err, self.fy(one.dat, two.dat)*two._err))) else: result._err = getdata(np.abs(self.fx(one.dat, two))*one._err) else: result._err = noerr return result
def make_masked_array(dataset, fill_value): ar = np.array(dataset) invalid = abs(ar - dataset._FillValue) < 0.001 r = MaskedArray(ar, invalid) return r