def _init_network(self, net_desc, epoch=None): if epoch is None: epoch = self.epoch self._close_tf_session() self._reset_graph() # The new session will by default use the newly created default graph. self._make_tf_session() tf.set_random_seed(42) from TFUtil import get_global_train_flag_placeholder if self.use_dynamic_train_flag: train_flag = get_global_train_flag_placeholder() else: train_flag = False if False: # TODO ... extern_data = ExternData() extern_data.init_from_config(self.config) # TODO... network = TFNetwork( name="root", config=self.config, rnd_seed=epoch, train_flag=train_flag, search_flag=self.use_search_flag) network.construct_from_dict(net_desc) network.initialize_params(session=self.tf_session) network.layers_desc = net_desc self.network = network if self.train_data: # Need to create new Updater because it has the learning_rate var which must be in the current graph. self.updater = Updater(config=self.config, tf_session=self.tf_session, network=network) self.updater.set_trainable_vars(network.get_trainable_params()) network.print_network_info()
def test_Updater_simple_batch(): with make_scope() as session: from TFNetwork import TFNetwork, ExternData from Config import Config from GeneratingDataset import Task12AXDataset dataset = Task12AXDataset() dataset.init_seq_order(epoch=1) extern_data = ExternData() extern_data.init_from_dataset(dataset) config = Config() network = TFNetwork(extern_data=extern_data, train_flag=True) network.construct_from_dict({ "layer1": { "class": "linear", "activation": "tanh", "n_out": 13 }, "layer2": { "class": "linear", "activation": "tanh", "n_out": 13, "from": ["layer1"] }, "output": { "class": "softmax", "loss": "ce", "target": "classes", "from": ["layer2"] } }) network.initialize_params(session=session) updater = Updater(config=config, network=network) updater.set_learning_rate(1.0, session=session) updater.set_trainable_vars(network.get_trainable_params()) updater.init_optimizer_vars(session=session) from TFDataPipeline import FeedDictDataProvider batches = dataset.generate_batches( recurrent_net=network.recurrent, batch_size=100, max_seqs=10, max_seq_length=sys.maxsize, used_data_keys=network.used_data_keys) data_provider = FeedDictDataProvider(tf_session=session, extern_data=extern_data, data_keys=network.used_data_keys, dataset=dataset, batches=batches) feed_dict, _ = data_provider.get_feed_dict(single_threaded=True) session.run(updater.get_optim_op(), feed_dict=feed_dict)
def test_Updater_CustomUpdate(): from TFNetwork import TFNetwork, ExternData from Config import Config from TFUtil import CustomUpdate config = Config() network = TFNetwork(extern_data=ExternData(), train_flag=True) layer = network.add_layer(name="output", layer_class=DummyLayer, initial_value=4.0) assert isinstance(layer, DummyLayer) network.initialize_params(session=session) class CustomUpdateAdd13(CustomUpdate): def update_var(self, var): return tf.assign_add(var, 13.0) CustomUpdateAdd13().set_on_var(layer.x) updater = Updater(config=config, network=network) updater.set_learning_rate(1000.0, session=session) # should be ignored updater.set_trainable_vars(network.get_trainable_params()) updater.init_optimizer_vars(session=session) session.run(updater.get_optim_op()) # Should have applied CustomUpdateAdd13. assert_almost_equal( session.run(network.get_default_output_layer().output.placeholder), 17.0)
def test_Updater_add_check_numerics_ops(): class _Layer(DummyLayer): def _get_loss_value(self): return tf.log(self.x) from TFNetwork import TFNetwork, ExternData from Config import Config config = Config() config.set("debug_add_check_numerics_ops", True) network = TFNetwork(extern_data=ExternData(), train_flag=True) network.add_layer(name="output", layer_class=_Layer, initial_value=1.0) network.initialize_params(session=session) updater = Updater(config=config, network=network) updater.set_learning_rate(1.0, session=session) updater.set_trainable_vars(network.get_trainable_params()) updater.init_optimizer_vars(session=session) # Should succeed. session.run(updater.get_optim_op()) # One gradient descent step from ln(x), x = 1.0: gradient is 1.0 / x, thus x - 1.0 = 0.0. assert_almost_equal( session.run(network.get_default_output_layer().output.placeholder), 0.0) try: # Now, should fail. session.run(updater.get_optim_op()) except tf.errors.InvalidArgumentError as exc: print("Expected exception: %r" % exc) else: assert False, "should have raised an exception"
def __init__(self, input_data, names, dtypes, extern_data, data_keys): """ :param dict[str,tf.Tensor] input_data: :param list[str] names: data_keys + extra info :param list[tf.DType|str] dtypes: corresponds to names :param ExternData extern_data: :param list[str] data_keys: """ from TFUtil import post_control_dependencies # The device-scope when this gets called is the default device, # so everywhere where we want to do it on CPU, we have to specify it explicitly. # StagingArea can be used for async CPU->GPU transfer. # It will live on the current device by the current device scope, e.g. the GPU. self._tf_staging_area = StagingArea(names=names, dtypes=dtypes) with tf.device("/cpu:0"): self.staging_size = tf.Variable(0, trainable=False, dtype=tf.int32, name="staging_size") self._staging_size_init = tf.variables_initializer([self.staging_size]) with tf.control_dependencies([tf.assign_add(self.staging_size, 1)]): self.stage_put_op = self._tf_staging_area.put(input_data) get_updates = [tf.assign_sub(self.staging_size, 1)] # This should run on the default device (GPU). self.stage_get_op = post_control_dependencies(self._tf_staging_area.get(), updates=get_updates) self.output_as_extern_data = ExternData( default_input=extern_data.default_input, default_target=extern_data.default_target, data={key: Data(**data.get_kwargs()) for (key, data) in data_keys}) for key, data in self.output_as_extern_data.data.items(): data.placeholder = self.stage_get_op[key] data.size_placeholder = { axis: self.stage_get_op["%s/size%i" % (key, axis)] for axis in data.get_axes_with_size()}
def test_Updater_multiple_optimizers_and_opts(): with make_scope() as session: from TFNetwork import TFNetwork, ExternData from Config import Config from GeneratingDataset import Task12AXDataset dataset = Task12AXDataset() dataset.init_seq_order(epoch=1) extern_data = ExternData() extern_data.init_from_dataset(dataset) config = Config() network = TFNetwork(extern_data=extern_data, train_flag=True) network.construct_from_dict({ "layer1": {"class": "linear", "activation": "tanh", "n_out": 13, "updater_opts": {"optimizer": {"class": "Adam"}, "accum_grad_multiple_step": 2}}, "layer2": {"class": "linear", "activation": "tanh", "n_out": 13, "from": ["layer1"], "updater_opts": { "optimizer": {"class": "Adagrad", "learning_rate_multiplier": 3}, "gradient_noise": 0.1}}, "output": {"class": "softmax", "loss": "ce", "target": "classes", "from": ["layer2"]} }) network.initialize_params(session=session) updater = Updater(config=config, network=network) updater.set_learning_rate(1.0, session=session) updater.set_trainable_vars(network.get_trainable_params()) updater.init_optimizer_vars(session=session) optim_op = updater.get_optim_op() assert isinstance(updater.optimizer, WrapOptimizer) assert len(updater.optimizer.optimizers) == 3 from TFDataPipeline import FeedDictDataProvider batches = dataset.generate_batches( recurrent_net=network.recurrent, batch_size=100, max_seqs=10, max_seq_length=sys.maxsize, used_data_keys=network.used_data_keys) data_provider = FeedDictDataProvider( tf_session=session, extern_data=extern_data, data_keys=network.used_data_keys, dataset=dataset, batches=batches) feed_dict, _ = data_provider.get_feed_dict(single_threaded=True) session.run(optim_op, feed_dict=feed_dict)
def set_config_num_inputs_outputs_from_dataset(config, dataset): """ :param Config.Config config: :param Dataset dataset: """ config.set("num_inputs", dataset.num_inputs) from Util import BackendEngine if BackendEngine.is_tensorflow_selected(): # TF supports more fine-grained specification, # however the dataset does not store that in num_outputs. from TFNetwork import ExternData config.set("num_outputs", { key: ExternData.data_kwargs_from_dataset_key(dataset=dataset, key=key) for key in dataset.get_data_keys()}) else: config.set("num_outputs", dataset.num_outputs)
def set_config_num_inputs_outputs_from_dataset(config, dataset): """ :param Config.Config config: :param Dataset dataset: """ from Util import BackendEngine if BackendEngine.is_tensorflow_selected(): # TF supports more fine-grained specification, # however the dataset does not store that in num_outputs. from TFNetwork import ExternData config.set("extern_data", { key: ExternData.data_kwargs_from_dataset_key(dataset=dataset, key=key) for key in dataset.get_data_keys()}) else: config.set("num_inputs", dataset.num_inputs) config.set("num_outputs", dataset.num_outputs)
def __init__(self, data_queues, batch_size, max_seqs, capacity=10): """ :param TFDataQueues data_queues: :param int batch_size: :param int max_seqs: :param int capacity: """ assert not data_queues.with_batch self.data_queues = data_queues self.batch_size = batch_size self.max_seqs = max_seqs self.shapes = {key: data.batch_shape for (key, data) in data_queues.extern_data.data.items()} for key, data in data_queues.extern_data.data.items(): assert data.batch_dim_axis == 0, "batch-dim currently is always added at axis 0" for axis in data.get_axes_with_size(): self.shapes["%s/size%i" % (key, axis)] = (None,) # (batch,) self._tf_out_queue = tf.PaddingFIFOQueue( capacity=capacity, name="TFBatchingQueue", names=data_queues.names, dtypes=data_queues.dtypes, shapes=[self.data_queues.shapes[key] for key in data_queues.names]) self._tf_batch_nums = tf.FIFOQueue( capacity=capacity, dtypes=[tf.int32], shapes=[()]) self._cur_batch_num = tf.Variable(initial_value=0, dtype=tf.int32, trainable=False, name="batch_num") self._cur_max_seq_len = tf.Variable(initial_value=0, dtype=tf.int32, trainable=False, name="max_seq_len") self._tf_enqueue_loop_op = self._make_enqueue_loop_op() from TFUtil import Data self.output_as_extern_data = ExternData( default_input=data_queues.extern_data.default_input, default_target=data_queues.extern_data.default_target, data={key: Data(**data.get_kwargs()) for (key, data) in data_queues.extern_data.data.items()}) self.batch_queue_size = self._tf_batch_nums.size() batch_dequeue_op = self._tf_out_queue.dequeue_up_to(n=self._tf_batch_nums.dequeue()) self.last_seq_idx = tf.Variable(initial_value=0, dtype=tf.int32, trainable=False, name="last_seq_idx") self.seq_counter = tf.Variable(initial_value=0, dtype=tf.int32, trainable=False, name="seq_counter") self.batch_counter = tf.Variable(initial_value=0, dtype=tf.int32, trainable=False, name="batch_counter") default_input_key = self.output_as_extern_data.default_input default_input_data = self.output_as_extern_data.data[default_input_key] last_batch_size = tf.shape(self.batch_dequeue_op[default_input_key])[default_input_data.batch_dim_axis] updates = [ tf.assign(self.last_seq_idx, tf.maximum(self.last_seq_idx, tf.reduce_max(self.batch_dequeue_op["seq_idx"]))), tf.assign_add(self.seq_counter, last_batch_size), tf.assign_add(self.batch_counter, 1)] self._init_op = tf.variables_initializer([ self._cur_batch_num, self._cur_max_seq_len, self.last_seq_idx, self.seq_counter, self.batch_counter]) from TFUtil import post_control_dependencies self.batch_dequeue_op = post_control_dependencies(batch_dequeue_op, updates=updates)
def test_Updater_GradientDescent(): from TFNetwork import TFNetwork, ExternData from Config import Config config = Config() network = TFNetwork(extern_data=ExternData(), train_flag=True) network.add_layer(name="output", layer_class=DummyLayer, initial_value=5.0, loss_value_factor=3.0) network.initialize_params(session=session) updater = Updater(config=config, tf_session=session, network=network) updater.set_learning_rate(1.0) updater.set_trainable_vars(network.get_trainable_params()) session.run(updater.get_optim_op()) # One gradient descent step from 3.0 * x: gradient is 3, thus 5 - 3 = 2. assert_almost_equal( session.run(network.get_default_output_layer().output.placeholder), 2.0)