Ejemplo n.º 1
0
    def test_statistics(self):
        """Dataset writer and reader test, checks statistics computations."""
        output_path = FLAGS.test_tmpdir
        output_name = 'temp'

        equation_name = 'advection'
        discretization = 'finite_volume'

        # create a temporary dataset
        with flagsaver.flagsaver(
                dataset_path=output_path,
                dataset_name=output_name,
                equation_name=equation_name,
                discretization=discretization,
                simulation_grid_size=256,
                output_grid_size=32,
                dataset_type='all_derivatives',
                total_time_steps=10,
                example_num_time_steps=3,
                time_step_interval=5,
                num_seeds=4,
        ):
            create_training_data.main([], runner=beam.runners.DirectRunner())

        metadata_path = os.path.join(output_path,
                                     output_name + '.metadata.json')
        dataset_metadata = readers.load_metadata(metadata_path)
        low_res_grid = readers.get_output_grid(dataset_metadata)

        equation = advection_equations.FiniteVolumeAdvectionDiffusion(
            diffusion_coefficient=0.1)
        data_key = equation.key_definitions['concentration'].exact()
        dataset = readers.initialize_dataset(dataset_metadata,
                                             ((data_key, ), ),
                                             (low_res_grid, ))
        dataset = dataset.repeat(1)
        dataset = dataset.batch(1)
        all_data = np.concatenate(
            [np.ravel(data[0][data_key]) for data in dataset])

        expected_mean = np.mean(all_data)
        expected_variance = np.var(all_data, ddof=1)

        keys = readers.data_component_keys(dataset_metadata['components'])
        components_dict = dict(zip(keys, dataset_metadata['components']))

        component = components_dict[data_key, low_res_grid]
        metadata_mean = component['statistics']['mean']
        metadata_variance = component['statistics']['variance']

        np.testing.assert_allclose(metadata_mean, expected_mean, atol=1e-3)
        np.testing.assert_allclose(metadata_variance,
                                   expected_variance,
                                   atol=1e-3)
Ejemplo n.º 2
0
  def load_data(
      self,
      metadata: Mapping[str, Any],
      prefix: states.Prefix = states.Prefix.EXACT,
  ) -> tf.data.Dataset:
    """Load data into a tf.data.Dataset for inferrence or training."""

    def replace_state_keys_with_names(state):
      return {k: state[equation.key_definitions[k].with_prefix(prefix)]
              for k in equation.base_keys}

    equation = readers.get_equation(metadata)
    grid = readers.get_output_grid(metadata)
    keys = [equation.key_definitions[k].with_prefix(prefix)
            for k in equation.base_keys]
    dataset = readers.initialize_dataset(metadata, [keys], [grid])
    dataset = dataset.map(replace_state_keys_with_names)
    return dataset
Ejemplo n.º 3
0
    def test_shapes_and_exceptions(self):
        """Dataset writer and reader test, checks shapes and exceptions."""
        output_path = FLAGS.test_tmpdir
        output_name = 'temp'
        equation_name = 'advection_diffusion'
        discretization = 'finite_volume'
        dataset_type = 'all_derivatives'
        high_resolution = 125
        low_resolution = 25
        shards = 2
        example_num_time_steps = 3
        batch_size = 4
        diffusion_coefficient = 0.3

        expected_equation = advection_equations.FiniteVolumeAdvectionDiffusion(
            diffusion_coefficient=diffusion_coefficient)

        # create a temporary dataset
        with flagsaver.flagsaver(
                dataset_path=output_path,
                dataset_name=output_name,
                equation_name=equation_name,
                discretization=discretization,
                simulation_grid_size=high_resolution,
                output_grid_size=low_resolution,
                equation_kwargs=str(
                    dict(diffusion_coefficient=diffusion_coefficient)),
                dataset_type=dataset_type,
                num_shards=shards,
                total_time_steps=10,
                example_num_time_steps=example_num_time_steps,
                time_step_interval=5,
                num_seeds=4,
        ):
            create_training_data.main([], runner=beam.runners.DirectRunner())

        metadata_path = os.path.join(output_path,
                                     output_name + '.metadata.json')
        self.assertTrue(gfile.exists(metadata_path))
        dataset_metadata = readers.load_metadata(metadata_path)
        low_res_grid = readers.get_output_grid(dataset_metadata)
        high_res_grid = readers.get_simulation_grid(dataset_metadata)
        equation = readers.get_equation(dataset_metadata)

        self.assertEqual(low_res_grid.size_x, low_resolution)
        self.assertEqual(low_res_grid.size_y, low_resolution)
        self.assertEqual(high_res_grid.size_x, high_resolution)
        self.assertEqual(high_res_grid.size_y, high_resolution)
        self.assertAlmostEqual(high_res_grid.step, 2 * np.pi / high_resolution)
        self.assertAlmostEqual(equation.diffusion_coefficient,
                               diffusion_coefficient)
        self.assertIs(type(equation), type(expected_equation))

        state_keys = expected_equation.key_definitions
        valid_data_keys = ((state_keys['concentration'].exact(), ),
                           (state_keys['concentration_edge_x'].exact(),
                            state_keys['concentration_y_edge_y'].exact()))
        invalid_data_keys = ((state_keys['concentration'],
                              state_keys['concentration_edge_x']),
                             (state_keys['concentration_edge_x'], ))
        valid_data_grids = (low_res_grid, low_res_grid)
        invalid_data_grids = (low_res_grid, high_res_grid)

        with self.assertRaises(ValueError):
            readers.initialize_dataset(dataset_metadata, invalid_data_keys,
                                       valid_data_grids)
        with self.assertRaises(ValueError):
            readers.initialize_dataset(dataset_metadata, valid_data_keys,
                                       invalid_data_grids)
        with self.assertRaises(ValueError):
            readers.initialize_dataset(dataset_metadata, invalid_data_keys,
                                       invalid_data_grids)

        dataset = readers.initialize_dataset(dataset_metadata, valid_data_keys,
                                             valid_data_grids)
        dataset = dataset.repeat()
        dataset = dataset.batch(batch_size)

        [(first_state, second_state)] = dataset.take(1)
        self.assertEqual(set(first_state.keys()), set(valid_data_keys[0]))
        self.assertEqual(set(second_state.keys()), set(valid_data_keys[1]))
        first_state_shape = np.shape(first_state[valid_data_keys[0][0]])
        second_state_shape = np.shape(second_state[valid_data_keys[1][0]])
        expected_shape = (batch_size, example_num_time_steps, low_resolution,
                          low_resolution)
        self.assertEqual(first_state_shape, expected_shape)
        self.assertEqual(second_state_shape, expected_shape)