def test_find_time_slice(self): self.write_cube('2019-01-01', 10) # Cube does not exists --> write new result = find_time_slice(self.CUBE_PATH_2, np.datetime64('2018-12-30T13:00')) self.assertEqual((-1, 'create'), result) # Before first step --> insert before 0 result = find_time_slice(self.CUBE_PATH, np.datetime64('2018-12-30T13:00')) self.assertEqual((0, 'insert'), result) # After first step --> insert before 1 result = find_time_slice(self.CUBE_PATH, np.datetime64('2019-01-02')) self.assertEqual((1, 'insert'), result) # In-between --> insert before 5 result = find_time_slice(self.CUBE_PATH, np.datetime64('2019-01-06T10:00:00')) self.assertEqual((5, 'insert'), result) # In-between at existing time-stamp --> replace 5 result = find_time_slice(self.CUBE_PATH, np.datetime64('2019-01-06T12:00:00')) self.assertEqual((5, 'replace'), result) # After last step --> append result = find_time_slice(self.CUBE_PATH, np.datetime64('2019-01-12')) self.assertEqual((-1, 'append'), result)
def _process_input(input_processor: InputProcessor, input_reader: DatasetIO, input_reader_params: Dict[str, Any], output_writer: DatasetIO, output_writer_params: Dict[str, Any], input_file: str, output_size: Tuple[int, int], output_region: Tuple[float, float, float, float], output_resampling: str, output_path: str, output_metadata: NameAnyDict = None, output_variables: NameDictPairList = None, processed_variables: NameDictPairList = None, profile_mode: bool = False, dry_run: bool = False, monitor: Callable[..., None] = None) -> bool: monitor('reading input slice...') # noinspection PyBroadException try: input_dataset = input_reader.read(input_file, **input_reader_params) monitor(f'Dataset read:\n{input_dataset}') except Exception as e: monitor(f'Error: cannot read input: {e}: skipping...') traceback.print_exc() return False time_range = input_processor.get_time_range(input_dataset) if time_range[0] > time_range[1]: monitor('Error: start time is greater than end time: skipping...') return False if output_variables: output_variables = to_resolved_name_dict_pairs(output_variables, input_dataset, keep=True) else: output_variables = [(var_name, None) for var_name in input_dataset.data_vars] time_index, update_mode = find_time_slice( output_path, from_time_in_days_since_1970((time_range[0] + time_range[1]) / 2)) width, height = output_size x_min, y_min, x_max, y_max = output_region xy_res = max((x_max - x_min) / width, (y_max - y_min) / height) output_geom = ImageGeom(size=output_size, x_min=x_min, y_min=y_min, xy_res=xy_res, is_geo_crs=True) steps = [] # noinspection PyShadowingNames def step1(input_slice): return input_processor.pre_process(input_slice) steps.append((step1, 'pre-processing input slice')) geo_coding = None # noinspection PyShadowingNames def step1a(input_slice): nonlocal geo_coding geo_coding = GeoCoding.from_dataset(input_slice) subset = select_spatial_subset(input_slice, xy_bbox=output_geom.xy_bbox, xy_border=output_geom.xy_res, ij_border=1, geo_coding=geo_coding) if subset is None: monitor('no spatial overlap with input') elif subset is not input_slice: geo_coding = GeoCoding.from_dataset(subset) return subset steps.append((step1a, 'spatial subsetting')) # noinspection PyShadowingNames def step2(input_slice): return evaluate_dataset(input_slice, processed_variables=processed_variables) steps.append((step2, 'computing input slice variables')) # noinspection PyShadowingNames def step3(input_slice): extra_vars = input_processor.get_extra_vars(input_slice) selected_variables = set( [var_name for var_name, _ in output_variables]) selected_variables.update(extra_vars or set()) return select_variables_subset(input_slice, selected_variables) steps.append((step3, 'selecting input slice variables')) # noinspection PyShadowingNames def step4(input_slice): # noinspection PyTypeChecker return input_processor.process(input_slice, geo_coding=geo_coding, output_geom=output_geom, output_resampling=output_resampling, include_non_spatial_vars=False) steps.append((step4, 'transforming input slice')) if time_range is not None: def step5(input_slice): return add_time_coords(input_slice, time_range) steps.append((step5, 'adding time coordinates to input slice')) def step6(input_slice): return update_dataset_var_attrs(input_slice, output_variables) steps.append((step6, 'updating variable attributes of input slice')) def step7(input_slice): return input_processor.post_process(input_slice) steps.append((step7, 'post-processing input slice')) if update_mode == 'create': def step8(input_slice): if not dry_run: rimraf(output_path) output_writer.write(input_slice, output_path, **output_writer_params) _update_cube_attrs(output_writer, output_path, global_attrs=output_metadata, temporal_only=False) return input_slice steps.append((step8, f'creating input slice in {output_path}')) elif update_mode == 'append': def step8(input_slice): if not dry_run: output_writer.append(input_slice, output_path, **output_writer_params) _update_cube_attrs(output_writer, output_path, temporal_only=True) return input_slice steps.append((step8, f'appending input slice to {output_path}')) elif update_mode == 'insert': def step8(input_slice): if not dry_run: output_writer.insert(input_slice, time_index, output_path) _update_cube_attrs(output_writer, output_path, temporal_only=True) return input_slice steps.append(( step8, f'inserting input slice before index {time_index} in {output_path}' )) elif update_mode == 'replace': def step8(input_slice): if not dry_run: output_writer.replace(input_slice, time_index, output_path) _update_cube_attrs(output_writer, output_path, temporal_only=True) return input_slice steps.append( (step8, f'replacing input slice at index {time_index} in {output_path}')) if profile_mode: pr = cProfile.Profile() pr.enable() status = True try: num_steps = len(steps) dataset = input_dataset total_t1 = time.perf_counter() for step_index in range(num_steps): transform, label = steps[step_index] step_t1 = time.perf_counter() monitor(f'step {step_index + 1} of {num_steps}: {label}...') dataset = transform(dataset) step_t2 = time.perf_counter() if dataset is None: monitor( f' {label} terminated after {step_t2 - step_t1} seconds, skipping input slice' ) status = False break monitor(f' {label} completed in {step_t2 - step_t1} seconds') total_t2 = time.perf_counter() monitor( f'{num_steps} steps took {total_t2 - total_t1} seconds to complete' ) except RuntimeError as e: monitor( f'Error: something went wrong during processing, skipping input slice: {e}' ) traceback.print_exc() status = False finally: input_dataset.close() if profile_mode: # noinspection PyUnboundLocalVariable pr.disable() s = io.StringIO() ps = pstats.Stats(pr, stream=s).sort_stats('cumtime') ps.print_stats() monitor(s.getvalue()) return status