def simulate_state_lines_losses(eventlookup, freq_mean, states, lines, sims): '''assembles state line level events based on the year event ''' logger = logging.getLogger(__name__) logger.info('start state lines losses') numberofevents = dd(np.random.poisson(freq_mean, sims), index=np.arange(1, sims + 1), columns=['events']) catevents = simulate_events(numberofevents, eventlookup, sims) simsevents = list(range(len(catevents))) #combinedResults = xr.DataArray(np.empty((len(states), len(lines), len(catevents), 4)),name="catevents", coords=[states['state'], lines['line'], simsevents, ["sim", "eventseq", "eventid", "rand"]], dims=['state', 'line', 'eventsim', 'data'] ) logger.info( 'start to build full array of losses, combining state lines with events' ) sim_events = dd() firstloop = True for state in states['state']: print(f'start {state}') for line in lines['line']: #combinedResults.loc[state, line] = catevents.copy() print(f'start {line}') b = catevents.copy() b['state'] = state b['line'] = line if firstloop: sim_events = b firstloop = False else: sim_events = dd.concat([sim_events, b]) #sim_events = pd.concat(a, ignore_index=True, axis=0, ) logger.info('Completed combined state lines with events') return combinedResults
def scroll_dates(self, variable, cloud): self.x_len = self.x_pix * 25 self.x_fin = self.x_in + self.x_len #+ because w to e self.y_len = self.y_pix * 25 self.y_fin = self.y_in + self.y_len print(f""" |----------------------------| | {self.y_fin} | |{self.x_in} {self.x_fin} | | {self.y_in} | |----------------------------| """) # vr = variable.sel(y=np.arange(self.y_in, self.y_fin, 25), x=np.arange(self.x_in, self.x_fin,25), method='nearest') vr = variable.sel(y=np.arange(self.y_in, self.y_fin, 25), x=np.arange(self.x_in, self.x_fin, 25), method='nearest') n_good = (vr == 16383).sum(dim=("x", "y")) g = n_good.values tt = n_good.time.values gu = [] print("DATE, QUALITY\n") for n, v in enumerate( g ): #date_printable, pixel_printable in zip(n_good.time.values, g): if v == self.x_pix * self.y_pix: gu.append(n) # if pixel_printable == self.x_pix * self.y_pix: print(f"{tt[n]}, {v} <------ X") else: print(f"{tt[n]}, {v}") # gu = [n for n, v in enumerate(g) if v == self.x_pix*self.y_pix] print( f"{dt.datetime.now()}: {len(gu)} good quality dates in the bbox ") self.arr = variable.isel( time=gu) #TODO! add here the bounding of the shape! # print(f"original chunks: {self.arr.chunks}") # a1 = dd((self.arr == 4095).sum(dim=("x","y")).values) # # a2 = dd((self.arr == 13311).sum(dim=("x","y")).values) # a3 = dd((self.arr == 15359).sum(dim=("x","y")).values) # a4 = dd((self.arr == 14335).sum(dim=("x","y")).values) # a5 = dd((self.arr == 8191).sum(dim=("x","y")).values) a1 = dd((self.arr == 16383).sum(dim=("x", "y")).values) a6 = dd( np.isfinite( self.arr.values).sum(axis=(self.arr.get_axis_num("x"), self.arr.get_axis_num("y")))) # a7 = dd(((a1+a2+a3+a4+a5) / a6)) a7 = dd(a1 / a6) #total = client.compute(a7) #j=total.result() a7 = a7.compute() return [(self.arr.time[n]).values for n, v in enumerate(a7) if v > (1 - cloud)] #TODO! hardcoded cloudcover
def file_reader(filename, record_by=None, order=None, lazy=False, optimize=True): """Reads a DM3 file and loads the data into the appropriate class. data_id can be specified to load a given image within a DM3 file that contains more than one dataset. Parameters ---------- record_by: Str One of: SI, Signal2D order : Str One of 'C' or 'F' lazy : bool, default False Load the signal lazily. %s """ with open(filename, "rb") as f: dm = DigitalMicrographReader(f) dm.parse_file() images = [ ImageObject(imdict, f, order=order, record_by=record_by) for imdict in dm.get_image_dictionaries() ] imd = [] del dm.tags_dict['ImageList'] dm.tags_dict['ImageList'] = {} for image in images: dm.tags_dict['ImageList'][ 'TagGroup0'] = image.imdict.as_dictionary() axes = image.get_axes_dict() mp = image.get_metadata() mp['General']['original_filename'] = os.path.split(filename)[1] post_process = [] if image.to_spectrum is True: post_process.append(lambda s: s.to_signal1D(optimize=optimize)) post_process.append(lambda s: s.squeeze()) if lazy: image.filename = filename from dask.array import from_delayed import dask.delayed as dd val = dd(image.get_data, pure=True)() data = from_delayed(val, shape=image.shape, dtype=image.dtype) else: data = image.get_data() imd.append({ 'data': data, 'axes': axes, 'metadata': mp, 'original_metadata': dm.tags_dict, 'post_process': post_process, 'mapping': image.get_mapping(), }) return imd file_reader.__doc__ %= (OPTIMIZE_ARG.replace('False', 'True'))
def _map_all(self, function, inplace=True, **kwargs): calc_result = dd(function)(self.data, **kwargs) if inplace: self.data = da.from_delayed(calc_result, shape=self.data.shape, dtype=self.data.dtype) return None return self._deepcopy_with_new_data(calc_result)
def file_reader(filename, record_by=None, order=None, lazy=False, optimize=True): """Reads a DM3 file and loads the data into the appropriate class. data_id can be specified to load a given image within a DM3 file that contains more than one dataset. Parameters ---------- record_by: Str One of: SI, Signal2D order : Str One of 'C' or 'F' lazy : bool, default False Load the signal lazily. %s """ with open(filename, "rb") as f: dm = DigitalMicrographReader(f) dm.parse_file() images = [ImageObject(imdict, f, order=order, record_by=record_by) for imdict in dm.get_image_dictionaries()] imd = [] del dm.tags_dict['ImageList'] dm.tags_dict['ImageList'] = {} for image in images: dm.tags_dict['ImageList'][ 'TagGroup0'] = image.imdict.as_dictionary() axes = image.get_axes_dict() mp = image.get_metadata() mp['General']['original_filename'] = os.path.split(filename)[1] post_process = [] if image.to_spectrum is True: post_process.append(lambda s: s.to_signal1D(optimize=optimize)) post_process.append(lambda s: s.squeeze()) if lazy: image.filename = filename from dask.array import from_delayed import dask.delayed as dd val = dd(image.get_data, pure=True)() data = from_delayed(val, shape=image.shape, dtype=image.dtype) else: data = image.get_data() imd.append( {'data': data, 'axes': axes, 'metadata': mp, 'original_metadata': dm.tags_dict, 'post_process': post_process, 'mapping': image.get_mapping(), }) return imd file_reader.__doc__ %= (OPTIMIZE_ARG.replace('False', 'True'))
def _map_iterate(self, function, iterating_kwargs=(), show_progressbar=None, parallel=None, max_workers=None, ragged=None, inplace=True, **kwargs): if ragged not in (True, False): raise ValueError('"ragged" kwarg has to be bool for lazy signals') _logger.debug("Entering '_map_iterate'") size = max(1, self.axes_manager.navigation_size) from hyperspy.misc.utils import (create_map_objects, map_result_construction) func, iterators = create_map_objects(function, size, iterating_kwargs, **kwargs) iterators = (self._iterate_signal(), ) + iterators res_shape = self.axes_manager._navigation_shape_in_array # no navigation if not len(res_shape) and ragged: res_shape = (1, ) all_delayed = [dd(func)(data) for data in zip(*iterators)] if ragged: sig_shape = () sig_dtype = np.dtype('O') else: one_compute = all_delayed[0].compute() sig_shape = one_compute.shape sig_dtype = one_compute.dtype pixels = [ da.from_delayed(res, shape=sig_shape, dtype=sig_dtype) for res in all_delayed ] for step in reversed(res_shape): _len = len(pixels) starts = range(0, _len, step) ends = range(step, _len + step, step) pixels = [ da.stack(pixels[s:e], axis=0) for s, e in zip(starts, ends) ] result = pixels[0] res = map_result_construction(self, inplace, result, ragged, sig_shape, lazy=True) return res
def _map_iterate(self, function, iterating_kwargs=(), show_progressbar=None, parallel=None, ragged=None, inplace=True, **kwargs): if ragged not in (True, False): raise ValueError('"ragged" kwarg has to be bool for lazy signals') _logger.debug("Entering '_map_iterate'") size = max(1, self.axes_manager.navigation_size) from hyperspy.misc.utils import (create_map_objects, map_result_construction) func, iterators = create_map_objects(function, size, iterating_kwargs, **kwargs) iterators = (self._iterate_signal(), ) + iterators res_shape = self.axes_manager._navigation_shape_in_array # no navigation if not len(res_shape) and ragged: res_shape = (1,) all_delayed = [dd(func)(data) for data in zip(*iterators)] if ragged: sig_shape = () sig_dtype = np.dtype('O') else: one_compute = all_delayed[0].compute() sig_shape = one_compute.shape sig_dtype = one_compute.dtype pixels = [ da.from_delayed( res, shape=sig_shape, dtype=sig_dtype) for res in all_delayed ] for step in reversed(res_shape): _len = len(pixels) starts = range(0, _len, step) ends = range(step, _len + step, step) pixels = [ da.stack( pixels[s:e], axis=0) for s, e in zip(starts, ends) ] result = pixels[0] res = map_result_construction( self, inplace, result, ragged, sig_shape, lazy=True) return res
def simulate_events(numberofevents, eventlookup, sims): '''final output {sim, event, eventid}''' logger = logging.getLogger(__name__) logger.info('start simulation of events') totalevents = numberofevents['events'].sum() catevents = np.empty((max(totalevents, sims), 3)) rollingevent = 1 eventids = eventlookup.index normalizedprob = eventlookup['rate'] / eventlookup['rate'].sum() for sim in range(1, sims + 1): events = numberofevents['events'][sim] lowerindex = rollingevent - 1 upperindex = rollingevent + events - 1 #data = np.empty((events, 3)) #simindex = pd.Multiindex([np.full(events, sim),np.arange(1, events+1,1)], names=['sim', 'event'] ) simindex = np.full(events, int(sim)) simindex.astype(int) #data[0:events, 0] = simindex catevents[lowerindex:upperindex, 0] = simindex eventindex = np.arange(1, events + 1, 1) eventindex.astype(int) catevents[lowerindex:upperindex, 1] = eventindex #data[0:events, 1] = eventindex eventlookups = np.random.choice(eventids, p=normalizedprob, size=events) eventlookups.astype(int) catevents[lowerindex:upperindex, 2] = eventlookups #data[0:events, 2] = eventids #catevents = np.append(catevents, data, axis=0) rollingevent = rollingevent + events catevents = catevents.astype(int) catdatapd = dd(catevents, columns=['simulation', 'eventseq', 'eventid']) catdatapd['rand'] = np.random.uniform(0, 1, size=max(totalevents, sims)) #catdatapd = catdatapd.join(eventlookup, on='eventlookup') logger.info('finished generating simulated events') return catdatapd
def _map_iterate(self, function, iterating_kwargs=(), show_progressbar=None, parallel=None, max_workers=None, ragged=None, inplace=True, **kwargs): if ragged not in (True, False): raise ValueError('"ragged" kwarg has to be bool for lazy signals') _logger.debug("Entering '_map_iterate'") size = max(1, self.axes_manager.navigation_size) from hyperspy.misc.utils import (create_map_objects, map_result_construction) func, iterators = create_map_objects(function, size, iterating_kwargs, **kwargs) iterators = (self._iterate_signal(), ) + iterators res_shape = self.axes_manager._navigation_shape_in_array # no navigation if not len(res_shape) and ragged: res_shape = (1, ) all_delayed = [dd(func)(data) for data in zip(*iterators)] if ragged: if inplace: raise ValueError("In place computation is not compatible with " "ragged array for lazy signal.") # Shape of the signal dimension will change for the each nav. # index, which means we can't predict the shape and the dtype needs # to be python object to support numpy ragged array sig_shape = () sig_dtype = np.dtype('O') else: one_compute = all_delayed[0].compute() # No signal dimension for scalar if np.isscalar(one_compute): sig_shape = () sig_dtype = type(one_compute) else: sig_shape = one_compute.shape sig_dtype = one_compute.dtype pixels = [ da.from_delayed(res, shape=sig_shape, dtype=sig_dtype) for res in all_delayed ] if ragged: if show_progressbar is None: from hyperspy.defaults_parser import preferences show_progressbar = preferences.General.show_progressbar # We compute here because this is not sure if this is possible # to make a ragged dask array: we need to provide a chunk size... res_data = np.empty(res_shape, dtype=sig_dtype) _logger.info("Lazy signal is computed to make the ragged array.") if show_progressbar: cm = ProgressBar else: cm = dummy_context_manager with cm(): try: for i, pixel in enumerate(pixels): res_data.flat[i] = pixel.compute() except MemoryError: raise MemoryError("The use of 'ragged' array requires the " "computation of the lazy signal.") else: if len(pixels) > 0: for step in reversed(res_shape): _len = len(pixels) starts = range(0, _len, step) ends = range(step, _len + step, step) pixels = [ da.stack(pixels[s:e], axis=0) for s, e in zip(starts, ends) ] res_data = pixels[0] res = map_result_construction(self, inplace, res_data, ragged, sig_shape, lazy=not ragged) return res