def _do_json_rpc(web_socket, rpc_request: dict, monitor: Monitor) -> dict: web_socket.write_message(json.dumps(rpc_request)) work_reported = None started = False while True and (monitor is None or not monitor.is_cancelled()): response_str = yield web_socket.read_message() rpc_response = json.loads(response_str) if 'progress' in rpc_response: if monitor: progress = rpc_response['progress'] total = progress.get('total') label = progress.get('label') worked = progress.get('worked') msg = progress.get('message') if not started: monitor.start(label or "start", total_work=total) started = True if started: if worked: if work_reported is None: work_reported = 0.0 work = worked - work_reported work_reported = worked else: work = None monitor.progress(work=work, msg=msg) else: if monitor and started: monitor.done() return rpc_response return {}
def no_op(num_steps: int = 10, step_duration: float = 0.5, fail_before: bool = False, fail_after: bool = False, monitor: Monitor = Monitor.NONE) -> bool: """ An operation that basically does nothing but spending configurable time. It may be useful for testing purposes. :param num_steps: Number of steps to iterate. :param step_duration: How much time to spend in each step in seconds. :param fail_before: If the operation should fail before spending time doing nothing. :param fail_after: If the operation should fail after spending time doing nothing. :param monitor: A progress monitor. :return: Always True """ import time monitor.start('Computing nothing', num_steps) if fail_before: raise ValueError('Intentionally failed before doing anything.') for i in range(num_steps): time.sleep(step_duration) monitor.progress(1.0, 'Step %s of %s doing nothing' % (i + 1, num_steps)) if fail_after: raise ValueError('Intentionally failed after doing nothing.') monitor.done() return True
def _build_catalogue(self, monitor: Monitor = Monitor.NONE): self._catalogue = {} catalogue_metadata = {} start_position = 0 max_records = _CSW_MAX_RESULTS matches = -1 while True: # fetch record metadata self._catalogue_service.getrecords2(esn='full', outputschema=self._namespaces.get_namespace('gmd'), startposition=start_position, maxrecords=max_records) if matches == -1: # set counters, start progress monitor matches = self._catalogue_service.results.get('matches') if matches == 0: break monitor.start(label="Fetching catalogue data... (%d records)" % matches, total_work=ceil(matches / max_records)) catalogue_metadata.update(self._catalogue_service.records) monitor.progress(work=1) # bump counters start_position += max_records if start_position > matches: break self._catalogue = { record.identification.uricode[0]: { 'abstract': record.identification.abstract, 'bbox_minx': record.identification.bbox.minx if record.identification.bbox else None, 'bbox_miny': record.identification.bbox.miny if record.identification.bbox else None, 'bbox_maxx': record.identification.bbox.maxx if record.identification.bbox else None, 'bbox_maxy': record.identification.bbox.maxy if record.identification.bbox else None, 'creation_date': next(iter(e.date for e in record.identification.date if e and e.type == 'creation'), None), 'publication_date': next(iter(e.date for e in record.identification.date if e and e.type == 'publication'), None), 'title': record.identification.title, 'data_sources': record.identification.uricode[1:], 'licences': record.identification.uselimitation, 'temporal_coverage_start': record.identification.temporalextent_start, 'temporal_coverage_end': record.identification.temporalextent_end } for record in catalogue_metadata.values() if record.identification and len(record.identification.uricode) > 0 } monitor.done()
def f(monitor: Monitor, x, a=4): monitor.start('f', 23) return_value = a * x monitor.done() return return_value
def _make_local(self, local_ds: 'LocalDataSource', time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE): local_id = local_ds.id time_range = TimeRangeLike.convert(time_range) if time_range else None region = PolygonLike.convert(region) if region else None var_names = VarNamesLike.convert( var_names) if var_names else None # type: Sequence compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL', NETCDF_COMPRESSION_LEVEL) compression_enabled = True if compression_level > 0 else False encoding_update = dict() if compression_enabled: encoding_update.update({ 'zlib': True, 'complevel': compression_level }) local_path = os.path.join(local_ds.data_store.data_store_path, local_id) data_store_path = local_ds.data_store.data_store_path if not os.path.exists(local_path): os.makedirs(local_path) monitor.start("Sync " + self.id, total_work=len(self._files.items())) for remote_relative_filepath, coverage in self._files.items(): child_monitor = monitor.child(work=1) file_name = os.path.basename(remote_relative_filepath) local_relative_filepath = os.path.join(local_id, file_name) local_absolute_filepath = os.path.join(data_store_path, local_relative_filepath) remote_absolute_filepath = os.path.join( self._data_store.data_store_path, remote_relative_filepath) if isinstance(coverage, Tuple): time_coverage_start = coverage[0] time_coverage_end = coverage[1] if not time_range or time_coverage_start >= time_range[ 0] and time_coverage_end <= time_range[1]: if region or var_names: do_update_of_variables_meta_info_once = True do_update_of_region_meta_info_once = True try: remote_dataset = xr.open_dataset( remote_absolute_filepath) if var_names: remote_dataset = remote_dataset.drop([ var_name for var_name in remote_dataset.data_vars.keys() if var_name not in var_names ]) if region: remote_dataset = normalize_impl(remote_dataset) remote_dataset = subset_spatial_impl( remote_dataset, region) geo_lon_min, geo_lat_min, geo_lon_max, geo_lat_max = region.bounds remote_dataset.attrs[ 'geospatial_lat_min'] = geo_lat_min remote_dataset.attrs[ 'geospatial_lat_max'] = geo_lat_max remote_dataset.attrs[ 'geospatial_lon_min'] = geo_lon_min remote_dataset.attrs[ 'geospatial_lon_max'] = geo_lon_max if do_update_of_region_meta_info_once: local_ds.meta_info[ 'bbox_maxx'] = geo_lon_max local_ds.meta_info[ 'bbox_minx'] = geo_lon_min local_ds.meta_info[ 'bbox_maxy'] = geo_lat_max local_ds.meta_info[ 'bbox_miny'] = geo_lat_min do_update_of_region_meta_info_once = False if compression_enabled: for sel_var_name in remote_dataset.variables.keys( ): remote_dataset.variables.get( sel_var_name).encoding.update( encoding_update) remote_dataset.to_netcdf(local_absolute_filepath) child_monitor.progress( work=1, msg=str(time_coverage_start)) finally: if do_update_of_variables_meta_info_once: variables_info = local_ds.meta_info.get( 'variables', []) local_ds.meta_info['variables'] = [ var_info for var_info in variables_info if var_info.get('name') in remote_dataset. variables.keys() and var_info.get('name') not in remote_dataset.dims.keys() ] do_update_of_variables_meta_info_once = False local_ds.add_dataset( os.path.join(local_id, file_name), (time_coverage_start, time_coverage_end)) child_monitor.done() else: shutil.copy(remote_absolute_filepath, local_absolute_filepath) local_ds.add_dataset( local_relative_filepath, (time_coverage_start, time_coverage_end)) child_monitor.done() monitor.done() return local_id
def __call__(self, x, a, monitor: Monitor): monitor.start('C', 19) output = {'y': x * a} monitor.done() return output
def _make_local(self, local_ds: 'LocalDataSource', time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE): # local_name = local_ds.name local_id = local_ds.name time_range = TimeRangeLike.convert(time_range) if time_range else None region = PolygonLike.convert(region) if region else None var_names = VarNamesLike.convert( var_names) if var_names else None # type: Sequence compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL', NETCDF_COMPRESSION_LEVEL) compression_enabled = True if compression_level > 0 else False encoding_update = dict() if compression_enabled: encoding_update.update({ 'zlib': True, 'complevel': compression_level }) local_path = os.path.join(local_ds.data_store.data_store_path, local_id) data_store_path = local_ds.data_store.data_store_path if not os.path.exists(local_path): os.makedirs(local_path) monitor.start("Sync " + self.name, total_work=len(self._files.items())) for remote_relative_filepath, coverage in self._files.items(): child_monitor = monitor.child(work=1) file_name = os.path.basename(remote_relative_filepath) local_relative_filepath = os.path.join(local_id, file_name) local_absolute_filepath = os.path.join(data_store_path, local_relative_filepath) remote_absolute_filepath = os.path.join( self._data_store.data_store_path, remote_relative_filepath) if isinstance(coverage, Tuple): time_coverage_start = coverage[0] time_coverage_end = coverage[1] remote_netcdf = None local_netcdf = None if not time_range or time_coverage_start >= time_range[ 0] and time_coverage_end <= time_range[1]: if region or var_names: try: remote_netcdf = NetCDF4DataStore( remote_absolute_filepath) local_netcdf = NetCDF4DataStore( local_absolute_filepath, mode='w', persist=True) local_netcdf.set_attributes( remote_netcdf.get_attrs()) remote_dataset = xr.Dataset.load_store( remote_netcdf) process_region = False if region: geo_lat_min = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lat_min') geo_lat_max = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lat_max') geo_lon_min = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lon_min') geo_lon_max = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lon_max') geo_lat_res = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lon_resolution') geo_lon_res = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lat_resolution') if not (isnan(geo_lat_min) or isnan(geo_lat_max) or isnan(geo_lon_min) or isnan(geo_lon_max) or isnan(geo_lat_res) or isnan(geo_lon_res)): process_region = True [lat_min, lon_min, lat_max, lon_max] = region.bounds lat_min = floor( (lat_min - geo_lat_min) / geo_lat_res) lat_max = ceil( (lat_max - geo_lat_min) / geo_lat_res) lon_min = floor( (lon_min - geo_lon_min) / geo_lon_res) lon_max = ceil( (lon_max - geo_lon_min) / geo_lon_res) # TODO (kbernat): check why dataset.sel fails! remote_dataset = remote_dataset.isel( drop=False, lat=slice(lat_min, lat_max), lon=slice(lon_min, lon_max)) geo_lat_max = lat_max * geo_lat_res + geo_lat_min geo_lat_min += lat_min * geo_lat_res geo_lon_max = lon_max * geo_lon_res + geo_lon_min geo_lon_min += lon_min * geo_lon_res if not var_names: var_names = [ var_name for var_name in remote_netcdf.variables.keys() ] var_names.extend([ coord_name for coord_name in remote_dataset.coords.keys() if coord_name not in var_names ]) child_monitor.start(label=file_name, total_work=len(var_names)) for sel_var_name in var_names: var_dataset = remote_dataset.drop([ var_name for var_name in remote_dataset.variables.keys() if var_name != sel_var_name ]) if compression_enabled: var_dataset.variables.get( sel_var_name).encoding.update( encoding_update) local_netcdf.store_dataset(var_dataset) child_monitor.progress(work=1, msg=sel_var_name) if process_region: local_netcdf.set_attribute( 'geospatial_lat_min', geo_lat_min) local_netcdf.set_attribute( 'geospatial_lat_max', geo_lat_max) local_netcdf.set_attribute( 'geospatial_lon_min', geo_lon_min) local_netcdf.set_attribute( 'geospatial_lon_max', geo_lon_max) finally: if remote_netcdf: remote_netcdf.close() if local_netcdf: local_netcdf.close() local_ds.add_dataset( local_relative_filepath, (time_coverage_start, time_coverage_end)) child_monitor.done() else: shutil.copy(remote_absolute_filepath, local_absolute_filepath) local_ds.add_dataset( local_relative_filepath, (time_coverage_start, time_coverage_end)) child_monitor.done() monitor.done() return local_id
def _make_local(self, local_ds: 'LocalDataSource', time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE): local_id = local_ds.id time_range = TimeRangeLike.convert(time_range) if time_range else None var_names = VarNamesLike.convert(var_names) if var_names else None # type: Sequence compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL', NETCDF_COMPRESSION_LEVEL) compression_enabled = True if compression_level > 0 else False encoding_update = dict() if compression_enabled: encoding_update.update({'zlib': True, 'complevel': compression_level}) local_path = os.path.join(local_ds.data_store.data_store_path, local_id) data_store_path = local_ds.data_store.data_store_path if not os.path.exists(local_path): os.makedirs(local_path) monitor.start("Sync " + self.id, total_work=len(self._files.items())) for remote_relative_filepath, coverage in self._files.items(): child_monitor = monitor.child(work=1) file_name = os.path.basename(remote_relative_filepath) local_relative_filepath = os.path.join(local_id, file_name) local_absolute_filepath = os.path.join(data_store_path, local_relative_filepath) remote_absolute_filepath = os.path.join(self._data_store.data_store_path, remote_relative_filepath) if isinstance(coverage, Tuple): time_coverage_start = coverage[0] time_coverage_end = coverage[1] if not time_range or time_coverage_start >= time_range[0] and time_coverage_end <= time_range[1]: if region or var_names: do_update_of_variables_meta_info_once = True do_update_of_region_meta_info_once = True remote_dataset = None try: remote_dataset = xr.open_dataset(remote_absolute_filepath) if var_names: remote_dataset = remote_dataset.drop( [var_name for var_name in remote_dataset.data_vars.keys() if var_name not in var_names]) if region: remote_dataset = normalize_impl(remote_dataset) remote_dataset = adjust_spatial_attrs_impl(subset_spatial_impl(remote_dataset, region), allow_point=False) if do_update_of_region_meta_info_once: # subset_spatial_impl local_ds.meta_info['bbox_maxx'] = remote_dataset.attrs['geospatial_lon_max'] local_ds.meta_info['bbox_minx'] = remote_dataset.attrs['geospatial_lon_min'] local_ds.meta_info['bbox_maxy'] = remote_dataset.attrs['geospatial_lat_max'] local_ds.meta_info['bbox_miny'] = remote_dataset.attrs['geospatial_lat_min'] do_update_of_region_meta_info_once = False if compression_enabled: for sel_var_name in remote_dataset.variables.keys(): remote_dataset.variables.get(sel_var_name).encoding.update(encoding_update) remote_dataset.to_netcdf(local_absolute_filepath) child_monitor.progress(work=1, msg=str(time_coverage_start)) finally: if do_update_of_variables_meta_info_once and remote_dataset is not None: variables_info = local_ds.meta_info.get('variables', []) local_ds.meta_info['variables'] = [var_info for var_info in variables_info if var_info.get('name') in remote_dataset.variables.keys() and var_info.get('name') not in remote_dataset.dims.keys()] # noinspection PyUnusedLocal do_update_of_variables_meta_info_once = False local_ds.add_dataset(os.path.join(local_id, file_name), (time_coverage_start, time_coverage_end)) child_monitor.done() else: shutil.copy(remote_absolute_filepath, local_absolute_filepath) local_ds.add_dataset(local_relative_filepath, (time_coverage_start, time_coverage_end)) child_monitor.done() monitor.done() return local_id
def _make_local(self, local_ds: LocalDataSource, time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE): # local_name = local_ds.name local_id = local_ds.name time_range = TimeRangeLike.convert(time_range) if time_range else None region = PolygonLike.convert(region) if region else None var_names = VarNamesLike.convert( var_names) if var_names else None # type: Sequence compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL', NETCDF_COMPRESSION_LEVEL) compression_enabled = True if compression_level > 0 else False encoding_update = dict() if compression_enabled: encoding_update.update({ 'zlib': True, 'complevel': compression_level }) if region or var_names: protocol = _ODP_PROTOCOL_OPENDAP else: protocol = _ODP_PROTOCOL_HTTP local_path = os.path.join(local_ds.data_store.data_store_path, local_id) if not os.path.exists(local_path): os.makedirs(local_path) selected_file_list = self._find_files(time_range) if protocol == _ODP_PROTOCOL_OPENDAP: files = self._get_urls_list(selected_file_list, protocol) monitor.start('Sync ' + self.name, total_work=len(files)) for idx, dataset_uri in enumerate(files): child_monitor = monitor.child(work=1) file_name = os.path.basename(dataset_uri) local_filepath = os.path.join(local_path, file_name) time_coverage_start = selected_file_list[idx][1] time_coverage_end = selected_file_list[idx][2] remote_netcdf = None local_netcdf = None try: remote_netcdf = NetCDF4DataStore(dataset_uri) local_netcdf = NetCDF4DataStore(local_filepath, mode='w', persist=True) local_netcdf.set_attributes(remote_netcdf.get_attrs()) remote_dataset = xr.Dataset.load_store(remote_netcdf) process_region = False if region: geo_lat_min = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lat_min') geo_lat_max = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lat_max') geo_lon_min = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lon_min') geo_lon_max = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lon_max') geo_lat_res = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lon_resolution') geo_lon_res = self._get_harmonized_coordinate_value( remote_dataset.attrs, 'geospatial_lat_resolution') if not (isnan(geo_lat_min) or isnan(geo_lat_max) or isnan(geo_lon_min) or isnan(geo_lon_max) or isnan(geo_lat_res) or isnan(geo_lon_res)): process_region = True [lat_min, lon_min, lat_max, lon_max] = region.bounds lat_min = floor( (lat_min - geo_lat_min) / geo_lat_res) lat_max = ceil( (lat_max - geo_lat_min) / geo_lat_res) lon_min = floor( (lon_min - geo_lon_min) / geo_lon_res) lon_max = ceil( (lon_max - geo_lon_min) / geo_lon_res) # TODO (kbernat): check why dataset.sel fails! remote_dataset = remote_dataset.isel( drop=False, lat=slice(lat_min, lat_max), lon=slice(lon_min, lon_max)) geo_lat_max = lat_max * geo_lat_res + geo_lat_min geo_lat_min += lat_min * geo_lat_res geo_lon_max = lon_max * geo_lon_res + geo_lon_min geo_lon_min += lon_min * geo_lon_res if not var_names: var_names = [ var_name for var_name in remote_netcdf.variables.keys() ] var_names.extend([ coord_name for coord_name in remote_dataset.coords.keys() if coord_name not in var_names ]) child_monitor.start(label=file_name, total_work=len(var_names)) for sel_var_name in var_names: var_dataset = remote_dataset.drop([ var_name for var_name in remote_dataset.variables.keys() if var_name != sel_var_name ]) if compression_enabled: var_dataset.variables.get( sel_var_name).encoding.update(encoding_update) local_netcdf.store_dataset(var_dataset) child_monitor.progress(work=1, msg=sel_var_name) if process_region: local_netcdf.set_attribute('geospatial_lat_min', geo_lat_min) local_netcdf.set_attribute('geospatial_lat_max', geo_lat_max) local_netcdf.set_attribute('geospatial_lon_min', geo_lon_min) local_netcdf.set_attribute('geospatial_lon_max', geo_lon_max) finally: if remote_netcdf: remote_netcdf.close() if local_netcdf: local_netcdf.close() local_ds.add_dataset( os.path.join(local_id, file_name), (time_coverage_start, time_coverage_end)) child_monitor.done() else: outdated_file_list = [] for file_rec in selected_file_list: filename, _, _, file_size, url = file_rec dataset_file = os.path.join(local_path, filename) # todo (forman, 20160915): must perform better checks on dataset_file if it is... # ... outdated or incomplete or corrupted. # JSON also includes "checksum" and "checksum_type" fields. if not os.path.isfile(dataset_file) or ( file_size and os.path.getsize(dataset_file) != file_size): outdated_file_list.append(file_rec) if outdated_file_list: with monitor.starting('Sync ' + self.name, len(outdated_file_list)): bytes_to_download = sum( [file_rec[3] for file_rec in outdated_file_list]) dl_stat = _DownloadStatistics(bytes_to_download) file_number = 1 for filename, coverage_from, coverage_to, file_size, url in outdated_file_list: if monitor.is_cancelled(): raise InterruptedError dataset_file = os.path.join(local_path, filename) sub_monitor = monitor.child(work=1.0) # noinspection PyUnusedLocal def reporthook(block_number, read_size, total_file_size): dl_stat.handle_chunk(read_size) if monitor.is_cancelled(): raise InterruptedError sub_monitor.progress(work=read_size, msg=str(dl_stat)) sub_monitor_msg = "file %d of %d" % ( file_number, len(outdated_file_list)) with sub_monitor.starting(sub_monitor_msg, file_size): urllib.request.urlretrieve(url[protocol], filename=dataset_file, reporthook=reporthook) file_number += 1 local_ds.add_dataset(os.path.join(local_id, filename), (coverage_from, coverage_to)) local_ds.save() monitor.done()