def test_training_multiple_times(self, model_cls):
    # a basic integration test
    equation = readers.get_equation(self.metadata)
    grid = readers.get_output_grid(self.metadata)
    model = model_cls(equation, grid, num_time_steps=4)

    def create_inputs(state):
      # (batch, x, y)
      inputs = nest.map_structure(lambda x: x[:-model.num_time_steps], state)
      # (batch, time, x, y)
      labels = tensor_ops.stack_all_contiguous_slices(
          state['concentration'][1:], model.num_time_steps, new_axis=1)
      return inputs, labels

    training_data = (
        model.load_data(self.metadata)
        .map(create_inputs)
        .apply(tf.data.experimental.unbatch())
        .shuffle(10)
        .repeat()
        .batch(4, drop_remainder=True)
        .prefetch(1)
    )
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
                  loss='mean_squared_error')
    model.fit(training_data, epochs=1, steps_per_epoch=5)
    model.evaluate(training_data, steps=2)
Beispiel #2
0
    def test_statistics(self):
        """Dataset writer and reader test, checks statistics computations."""
        output_path = FLAGS.test_tmpdir
        output_name = 'temp'

        equation_name = 'advection'
        discretization = 'finite_volume'

        # create a temporary dataset
        with flagsaver.flagsaver(
                dataset_path=output_path,
                dataset_name=output_name,
                equation_name=equation_name,
                discretization=discretization,
                simulation_grid_size=256,
                output_grid_size=32,
                dataset_type='all_derivatives',
                total_time_steps=10,
                example_num_time_steps=3,
                time_step_interval=5,
                num_seeds=4,
        ):
            create_training_data.main([], runner=beam.runners.DirectRunner())

        metadata_path = os.path.join(output_path,
                                     output_name + '.metadata.json')
        dataset_metadata = readers.load_metadata(metadata_path)
        low_res_grid = readers.get_output_grid(dataset_metadata)

        equation = advection_equations.FiniteVolumeAdvectionDiffusion(
            diffusion_coefficient=0.1)
        data_key = equation.key_definitions['concentration'].exact()
        dataset = readers.initialize_dataset(dataset_metadata,
                                             ((data_key, ), ),
                                             (low_res_grid, ))
        dataset = dataset.repeat(1)
        dataset = dataset.batch(1)
        all_data = np.concatenate(
            [np.ravel(data[0][data_key]) for data in dataset])

        expected_mean = np.mean(all_data)
        expected_variance = np.var(all_data, ddof=1)

        keys = readers.data_component_keys(dataset_metadata['components'])
        components_dict = dict(zip(keys, dataset_metadata['components']))

        component = components_dict[data_key, low_res_grid]
        metadata_mean = component['statistics']['mean']
        metadata_variance = component['statistics']['variance']

        np.testing.assert_allclose(metadata_mean, expected_mean, atol=1e-3)
        np.testing.assert_allclose(metadata_variance,
                                   expected_variance,
                                   atol=1e-3)
Beispiel #3
0
    def test_training(self, model_cls):
        # a basic integration test
        equation = readers.get_equation(self.metadata)
        grid = readers.get_output_grid(self.metadata)
        model = model_cls(equation, grid)

        def create_inputs(state):
            inputs = nest.map_structure(lambda x: x[:-1], state)
            labels = state['concentration'][1:]
            return inputs, labels

        training_data = (model.load_data(
            self.metadata).repeat().shuffle(10).map(create_inputs))
        model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
                      loss='mean_squared_error')
        model.fit(training_data, epochs=1, steps_per_epoch=5)
        model.evaluate(training_data, steps=2)
Beispiel #4
0
  def load_data(
      self,
      metadata: Mapping[str, Any],
      prefix: states.Prefix = states.Prefix.EXACT,
  ) -> tf.data.Dataset:
    """Load data into a tf.data.Dataset for inferrence or training."""

    def replace_state_keys_with_names(state):
      return {k: state[equation.key_definitions[k].with_prefix(prefix)]
              for k in equation.base_keys}

    equation = readers.get_equation(metadata)
    grid = readers.get_output_grid(metadata)
    keys = [equation.key_definitions[k].with_prefix(prefix)
            for k in equation.base_keys]
    dataset = readers.initialize_dataset(metadata, [keys], [grid])
    dataset = dataset.map(replace_state_keys_with_names)
    return dataset
Beispiel #5
0
    def test_shapes_and_exceptions(self):
        """Dataset writer and reader test, checks shapes and exceptions."""
        output_path = FLAGS.test_tmpdir
        output_name = 'temp'
        equation_name = 'advection_diffusion'
        discretization = 'finite_volume'
        dataset_type = 'all_derivatives'
        high_resolution = 125
        low_resolution = 25
        shards = 2
        example_num_time_steps = 3
        batch_size = 4
        diffusion_coefficient = 0.3

        expected_equation = advection_equations.FiniteVolumeAdvectionDiffusion(
            diffusion_coefficient=diffusion_coefficient)

        # create a temporary dataset
        with flagsaver.flagsaver(
                dataset_path=output_path,
                dataset_name=output_name,
                equation_name=equation_name,
                discretization=discretization,
                simulation_grid_size=high_resolution,
                output_grid_size=low_resolution,
                equation_kwargs=str(
                    dict(diffusion_coefficient=diffusion_coefficient)),
                dataset_type=dataset_type,
                num_shards=shards,
                total_time_steps=10,
                example_num_time_steps=example_num_time_steps,
                time_step_interval=5,
                num_seeds=4,
        ):
            create_training_data.main([], runner=beam.runners.DirectRunner())

        metadata_path = os.path.join(output_path,
                                     output_name + '.metadata.json')
        self.assertTrue(gfile.exists(metadata_path))
        dataset_metadata = readers.load_metadata(metadata_path)
        low_res_grid = readers.get_output_grid(dataset_metadata)
        high_res_grid = readers.get_simulation_grid(dataset_metadata)
        equation = readers.get_equation(dataset_metadata)

        self.assertEqual(low_res_grid.size_x, low_resolution)
        self.assertEqual(low_res_grid.size_y, low_resolution)
        self.assertEqual(high_res_grid.size_x, high_resolution)
        self.assertEqual(high_res_grid.size_y, high_resolution)
        self.assertAlmostEqual(high_res_grid.step, 2 * np.pi / high_resolution)
        self.assertAlmostEqual(equation.diffusion_coefficient,
                               diffusion_coefficient)
        self.assertIs(type(equation), type(expected_equation))

        state_keys = expected_equation.key_definitions
        valid_data_keys = ((state_keys['concentration'].exact(), ),
                           (state_keys['concentration_edge_x'].exact(),
                            state_keys['concentration_y_edge_y'].exact()))
        invalid_data_keys = ((state_keys['concentration'],
                              state_keys['concentration_edge_x']),
                             (state_keys['concentration_edge_x'], ))
        valid_data_grids = (low_res_grid, low_res_grid)
        invalid_data_grids = (low_res_grid, high_res_grid)

        with self.assertRaises(ValueError):
            readers.initialize_dataset(dataset_metadata, invalid_data_keys,
                                       valid_data_grids)
        with self.assertRaises(ValueError):
            readers.initialize_dataset(dataset_metadata, valid_data_keys,
                                       invalid_data_grids)
        with self.assertRaises(ValueError):
            readers.initialize_dataset(dataset_metadata, invalid_data_keys,
                                       invalid_data_grids)

        dataset = readers.initialize_dataset(dataset_metadata, valid_data_keys,
                                             valid_data_grids)
        dataset = dataset.repeat()
        dataset = dataset.batch(batch_size)

        [(first_state, second_state)] = dataset.take(1)
        self.assertEqual(set(first_state.keys()), set(valid_data_keys[0]))
        self.assertEqual(set(second_state.keys()), set(valid_data_keys[1]))
        first_state_shape = np.shape(first_state[valid_data_keys[0][0]])
        second_state_shape = np.shape(second_state[valid_data_keys[1][0]])
        expected_shape = (batch_size, example_num_time_steps, low_resolution,
                          low_resolution)
        self.assertEqual(first_state_shape, expected_shape)
        self.assertEqual(second_state_shape, expected_shape)