Beispiel #1
0
 def get_specific_feed_dict(self, dataset, seq_idx):
     """
 :param Dataset.Dataset dataset:
 :param int seq_idx:
 :return: feed_dict for self.tf_session.run()
 :rtype: dict[str,numpy.ndarray]
 """
     # No Runner instance here but a very simplified version of Runner.run().
     # First we need a custom DataProvider with a custom BatchSetGenerator
     # which will yield only one single batch for the provided sequence idx.
     batch = Batch()
     batch.add_frames(seq_idx=seq_idx,
                      seq_start_frame=0,
                      length=dataset.get_seq_length(seq_idx))
     batch_generator = iter([batch])
     batches = BatchSetGenerator(dataset, generator=batch_generator)
     from TFDataPipeline import FeedDictDataProvider
     data_provider = FeedDictDataProvider(
         tf_session=self.tf_session,
         extern_data=self.network.extern_data,
         data_keys=self.network.used_data_keys,
         dataset=dataset,
         batches=batches)
     feed_dict = data_provider.get_feed_dict(single_threaded=True)
     return feed_dict
Beispiel #2
0
def test_Updater_simple_batch():
    with make_scope() as session:
        from TFNetwork import TFNetwork, ExternData
        from Config import Config
        from GeneratingDataset import Task12AXDataset
        dataset = Task12AXDataset()
        dataset.init_seq_order(epoch=1)
        extern_data = ExternData()
        extern_data.init_from_dataset(dataset)

        config = Config()
        network = TFNetwork(extern_data=extern_data, train_flag=True)
        network.construct_from_dict({
            "layer1": {
                "class": "linear",
                "activation": "tanh",
                "n_out": 13
            },
            "layer2": {
                "class": "linear",
                "activation": "tanh",
                "n_out": 13,
                "from": ["layer1"]
            },
            "output": {
                "class": "softmax",
                "loss": "ce",
                "target": "classes",
                "from": ["layer2"]
            }
        })
        network.initialize_params(session=session)

        updater = Updater(config=config, network=network)
        updater.set_learning_rate(1.0, session=session)
        updater.set_trainable_vars(network.get_trainable_params())
        updater.init_optimizer_vars(session=session)

        from TFDataPipeline import FeedDictDataProvider
        batches = dataset.generate_batches(
            recurrent_net=network.recurrent,
            batch_size=100,
            max_seqs=10,
            max_seq_length=sys.maxsize,
            used_data_keys=network.used_data_keys)
        data_provider = FeedDictDataProvider(tf_session=session,
                                             extern_data=extern_data,
                                             data_keys=network.used_data_keys,
                                             dataset=dataset,
                                             batches=batches)
        feed_dict, _ = data_provider.get_feed_dict(single_threaded=True)
        session.run(updater.get_optim_op(), feed_dict=feed_dict)
Beispiel #3
0
  def __init__(self, engine, dataset, batches, train, eval=True, extra_fetches=None, extra_fetches_callback=None):
    """
    :param Engine engine:
    :param Dataset.Dataset dataset:
    :param BatchSetGenerator batches:
    :param bool train: whether to do updates on the model
    :param bool eval: whether to evaluate (i.e. calculate loss/error)
    :param dict[str,tf.Tensor|TFUtil.Data|TFNetworkLayer.LayerBase]|None extra_fetches: additional fetches per step.
      `extra_fetches_callback` will be called with these. In case of Data/LayerBase, it will return a list,
      where each item corresponds to the batch-seq.
      It might also be useful to add `network.get_extern_data("seq_idx")` and `network.get_extern_data("seq_tag")`.
    :param (**dict[str,numpy.ndarray|str|list[numpy.ndarray|str])->None extra_fetches_callback: called if extra_fetches
    """
    from TFDataPipeline import FeedDictDataProvider, DataProviderBase
    self.engine = engine
    self.data_provider = FeedDictDataProvider(
      tf_session=engine.tf_session, extern_data=engine.network.extern_data,
      data_keys=engine.network.used_data_keys,
      dataset=dataset, batches=batches)
    assert isinstance(self.data_provider, DataProviderBase)
    self._should_train = train
    self._should_eval = eval
    self.store_metadata_mod_step = engine.config.int("store_metadata_mod_step", 0)
    self.reset_updater_vars_mod_step = engine.config.int("reset_updater_vars_mod_step", 0)
    self.finalized = False
    self.num_steps = None
    self.device_crash_batch = None  # type: int|None
    self.start_time = None
    self.elapsed = None
    self._results_accumulated = {}  # type: dict[str,float]  # entries like "cost:output" or "loss"
    self.results = {}  # type: dict[str,float]  # entries like "cost:output" or "loss"
    self.score = {}  # type: dict[str,float]  # entries like "cost:output"
    self.error = {}  # type: dict[str,float]  # entries like "error:output"
    self.stats = {}  # type: dict[str,float]  # entries like "stats:..."
    self.extra_fetches = extra_fetches
    if extra_fetches is not None:
      assert extra_fetches_callback
    self.extra_fetches_callback = extra_fetches_callback

    from Util import terminal_size
    terminal_width, _ = terminal_size()
    self._show_interactive_process_bar = (log.verbose[3] and (not log.verbose[5]) and terminal_width >= 0)
Beispiel #4
0
    def __init__(self, engine, dataset, batches, train, eval=True):
        """
    :param Engine engine:
    :param Dataset.Dataset dataset:
    :param BatchSetGenerator batches:
    :param bool train: whether to do updates on the model
    :param bool eval: whether to evaluate (i.e. calculate loss/error)
    """
        from TFDataPipeline import FeedDictDataProvider, DataProviderBase
        self.engine = engine
        self.data_provider = FeedDictDataProvider(
            tf_session=engine.tf_session,
            extern_data=engine.network.extern_data,
            data_keys=engine.network.used_data_keys,
            dataset=dataset,
            batches=batches)
        assert isinstance(self.data_provider, DataProviderBase)
        self._should_train = train
        self._should_eval = eval
        self.store_metadata_mod_step = engine.config.int(
            "store_metadata_mod_step", 0)
        self.reset_updater_vars_mod_step = engine.config.int(
            "reset_updater_vars_mod_step", 0)
        self.finalized = False
        self.num_steps = None
        self.device_crash_batch = None  # type: int|None
        self.start_time = None
        self.elapsed = None
        self._results_accumulated = {
        }  # type: dict[str,float]  # entries like "cost:output" or "loss"
        self.results = {
        }  # type: dict[str,float]  # entries like "cost:output" or "loss"
        self.score = {}  # type: dict[str,float]  # entries like "cost:output"
        self.error = {}  # type: dict[str,float]  # entries like "error:output"
        self.stats = {}  # type: dict[str,float]  # entries like "stats:..."

        from Util import terminal_size
        terminal_width, _ = terminal_size()
        self._show_interactive_process_bar = (log.verbose[3]
                                              and (not log.verbose[5])
                                              and terminal_width >= 0)
Beispiel #5
0
def test_DataProvider():
    """
  :param Dataset.Dataset dataset:
  :param int seq_idx:
  :param str|None output_layer_name: e.g. "output". if not set, will read from config "forward_output_layer"
  :return: numpy array, output in time major format (time,batch,dim)
  :rtype: numpy.ndarray
  """
    from GeneratingDataset import DummyDataset
    seq_len = 5
    n_data_dim = 2
    n_classes_dim = 3
    dataset = DummyDataset(input_dim=n_data_dim,
                           output_dim=n_classes_dim,
                           num_seqs=2,
                           seq_len=seq_len)
    dataset.init_seq_order(epoch=1)

    extern_data = ExternData()
    extern_data.init_from_dataset(dataset)

    # No Runner instance here but a very simplified version of Runner.run().
    # First we need a custom DataProvider with a custom BatchSetGenerator
    # which will yield only one single batch for the provided sequence idx.
    seq_idx = 0
    n_batch = 1
    batch = Batch()
    batch.add_frames(seq_idx=seq_idx,
                     seq_start_frame=0,
                     length=dataset.get_seq_length(seq_idx))
    batch_generator = iter([batch])
    batches = BatchSetGenerator(dataset, generator=batch_generator)
    from TFDataPipeline import FeedDictDataProvider
    data_provider = FeedDictDataProvider(tf_session=session,
                                         extern_data=extern_data,
                                         data_keys=["data", "classes"],
                                         dataset=dataset,
                                         batches=batches)

    feed_dict = data_provider.get_feed_dict(single_threaded=True)
    print(feed_dict)
    assert_is_instance(feed_dict, dict)
    assert extern_data.data["data"].placeholder in feed_dict
    assert extern_data.data["data"].size_placeholder[0] in feed_dict
    assert extern_data.data["classes"].placeholder in feed_dict
    assert extern_data.data["classes"].size_placeholder[0] in feed_dict
    data = feed_dict[extern_data.data["data"].placeholder]
    data_size = feed_dict[extern_data.data["data"].size_placeholder[0]]
    classes = feed_dict[extern_data.data["classes"].placeholder]
    classes_size = feed_dict[extern_data.data["classes"].size_placeholder[0]]
    assert_is_instance(data, numpy.ndarray)
    assert_is_instance(data_size, numpy.ndarray)
    assert_is_instance(classes, numpy.ndarray)
    assert_is_instance(classes_size, numpy.ndarray)
    assert_equal(data.shape, (n_batch, seq_len, n_data_dim))
    assert_equal(data_size.shape, (n_batch, ))
    assert_equal(classes.shape, (n_batch, seq_len))
    assert_equal(classes_size.shape, (n_batch, ))
    assert_equal(list(data_size), [seq_len])
    assert_equal(list(classes_size), [seq_len])
    numpy.testing.assert_almost_equal(list(data[0, 0]), [-0.5, -0.4])
    numpy.testing.assert_almost_equal(list(data[0, -1]), [0.3, 0.4])
    assert_equal(classes.tolist(), [[1, 2, 0, 1, 2]])