Ejemplo n.º 1
0
    def test_find_time_slice(self):
        self.write_cube('2019-01-01', 10)

        # Cube does not exists --> write new
        result = find_time_slice(self.CUBE_PATH_2,
                                 np.datetime64('2018-12-30T13:00'))
        self.assertEqual((-1, 'create'), result)

        # Before first step --> insert before 0
        result = find_time_slice(self.CUBE_PATH,
                                 np.datetime64('2018-12-30T13:00'))
        self.assertEqual((0, 'insert'), result)

        # After first step --> insert before 1
        result = find_time_slice(self.CUBE_PATH, np.datetime64('2019-01-02'))
        self.assertEqual((1, 'insert'), result)

        # In-between --> insert before 5
        result = find_time_slice(self.CUBE_PATH,
                                 np.datetime64('2019-01-06T10:00:00'))
        self.assertEqual((5, 'insert'), result)

        # In-between at existing time-stamp --> replace 5
        result = find_time_slice(self.CUBE_PATH,
                                 np.datetime64('2019-01-06T12:00:00'))
        self.assertEqual((5, 'replace'), result)

        # After last step --> append
        result = find_time_slice(self.CUBE_PATH, np.datetime64('2019-01-12'))
        self.assertEqual((-1, 'append'), result)
Ejemplo n.º 2
0
def _process_input(input_processor: InputProcessor,
                   input_reader: DatasetIO,
                   input_reader_params: Dict[str, Any],
                   output_writer: DatasetIO,
                   output_writer_params: Dict[str, Any],
                   input_file: str,
                   output_size: Tuple[int, int],
                   output_region: Tuple[float, float, float, float],
                   output_resampling: str,
                   output_path: str,
                   output_metadata: NameAnyDict = None,
                   output_variables: NameDictPairList = None,
                   processed_variables: NameDictPairList = None,
                   profile_mode: bool = False,
                   dry_run: bool = False,
                   monitor: Callable[..., None] = None) -> bool:
    monitor('reading input slice...')
    # noinspection PyBroadException
    try:
        input_dataset = input_reader.read(input_file, **input_reader_params)
        monitor(f'Dataset read:\n{input_dataset}')
    except Exception as e:
        monitor(f'Error: cannot read input: {e}: skipping...')
        traceback.print_exc()
        return False

    time_range = input_processor.get_time_range(input_dataset)
    if time_range[0] > time_range[1]:
        monitor('Error: start time is greater than end time: skipping...')
        return False

    if output_variables:
        output_variables = to_resolved_name_dict_pairs(output_variables,
                                                       input_dataset,
                                                       keep=True)
    else:
        output_variables = [(var_name, None)
                            for var_name in input_dataset.data_vars]

    time_index, update_mode = find_time_slice(
        output_path,
        from_time_in_days_since_1970((time_range[0] + time_range[1]) / 2))

    width, height = output_size
    x_min, y_min, x_max, y_max = output_region
    xy_res = max((x_max - x_min) / width, (y_max - y_min) / height)
    output_geom = ImageGeom(size=output_size,
                            x_min=x_min,
                            y_min=y_min,
                            xy_res=xy_res,
                            is_geo_crs=True)

    steps = []

    # noinspection PyShadowingNames
    def step1(input_slice):
        return input_processor.pre_process(input_slice)

    steps.append((step1, 'pre-processing input slice'))

    geo_coding = None

    # noinspection PyShadowingNames
    def step1a(input_slice):
        nonlocal geo_coding
        geo_coding = GeoCoding.from_dataset(input_slice)
        subset = select_spatial_subset(input_slice,
                                       xy_bbox=output_geom.xy_bbox,
                                       xy_border=output_geom.xy_res,
                                       ij_border=1,
                                       geo_coding=geo_coding)
        if subset is None:
            monitor('no spatial overlap with input')
        elif subset is not input_slice:
            geo_coding = GeoCoding.from_dataset(subset)
        return subset

    steps.append((step1a, 'spatial subsetting'))

    # noinspection PyShadowingNames
    def step2(input_slice):
        return evaluate_dataset(input_slice,
                                processed_variables=processed_variables)

    steps.append((step2, 'computing input slice variables'))

    # noinspection PyShadowingNames
    def step3(input_slice):
        extra_vars = input_processor.get_extra_vars(input_slice)
        selected_variables = set(
            [var_name for var_name, _ in output_variables])
        selected_variables.update(extra_vars or set())
        return select_variables_subset(input_slice, selected_variables)

    steps.append((step3, 'selecting input slice variables'))

    # noinspection PyShadowingNames
    def step4(input_slice):
        # noinspection PyTypeChecker
        return input_processor.process(input_slice,
                                       geo_coding=geo_coding,
                                       output_geom=output_geom,
                                       output_resampling=output_resampling,
                                       include_non_spatial_vars=False)

    steps.append((step4, 'transforming input slice'))

    if time_range is not None:

        def step5(input_slice):
            return add_time_coords(input_slice, time_range)

        steps.append((step5, 'adding time coordinates to input slice'))

    def step6(input_slice):
        return update_dataset_var_attrs(input_slice, output_variables)

    steps.append((step6, 'updating variable attributes of input slice'))

    def step7(input_slice):
        return input_processor.post_process(input_slice)

    steps.append((step7, 'post-processing input slice'))

    if update_mode == 'create':

        def step8(input_slice):
            if not dry_run:
                rimraf(output_path)
                output_writer.write(input_slice, output_path,
                                    **output_writer_params)
                _update_cube_attrs(output_writer,
                                   output_path,
                                   global_attrs=output_metadata,
                                   temporal_only=False)
            return input_slice

        steps.append((step8, f'creating input slice in {output_path}'))

    elif update_mode == 'append':

        def step8(input_slice):
            if not dry_run:
                output_writer.append(input_slice, output_path,
                                     **output_writer_params)
                _update_cube_attrs(output_writer,
                                   output_path,
                                   temporal_only=True)
            return input_slice

        steps.append((step8, f'appending input slice to {output_path}'))

    elif update_mode == 'insert':

        def step8(input_slice):
            if not dry_run:
                output_writer.insert(input_slice, time_index, output_path)
                _update_cube_attrs(output_writer,
                                   output_path,
                                   temporal_only=True)
            return input_slice

        steps.append((
            step8,
            f'inserting input slice before index {time_index} in {output_path}'
        ))

    elif update_mode == 'replace':

        def step8(input_slice):
            if not dry_run:
                output_writer.replace(input_slice, time_index, output_path)
                _update_cube_attrs(output_writer,
                                   output_path,
                                   temporal_only=True)
            return input_slice

        steps.append(
            (step8,
             f'replacing input slice at index {time_index} in {output_path}'))

    if profile_mode:
        pr = cProfile.Profile()
        pr.enable()

    status = True
    try:
        num_steps = len(steps)
        dataset = input_dataset
        total_t1 = time.perf_counter()
        for step_index in range(num_steps):
            transform, label = steps[step_index]
            step_t1 = time.perf_counter()
            monitor(f'step {step_index + 1} of {num_steps}: {label}...')
            dataset = transform(dataset)
            step_t2 = time.perf_counter()
            if dataset is None:
                monitor(
                    f'  {label} terminated after {step_t2 - step_t1} seconds, skipping input slice'
                )
                status = False
                break
            monitor(f'  {label} completed in {step_t2 - step_t1} seconds')
        total_t2 = time.perf_counter()
        monitor(
            f'{num_steps} steps took {total_t2 - total_t1} seconds to complete'
        )
    except RuntimeError as e:
        monitor(
            f'Error: something went wrong during processing, skipping input slice: {e}'
        )
        traceback.print_exc()
        status = False
    finally:
        input_dataset.close()

    if profile_mode:
        # noinspection PyUnboundLocalVariable
        pr.disable()
        s = io.StringIO()
        ps = pstats.Stats(pr, stream=s).sort_stats('cumtime')
        ps.print_stats()
        monitor(s.getvalue())

    return status