def make_Kuf(k, X, a, b, ms): omegas = 2. * np.pi * ms / (b - a) if float_type is tf.float32: omegas = omegas.astype(np.float32) Kuf_cos = tf.transpose(tf.cos(omegas * (X - a))) omegas_sin = omegas[omegas != 0] # don't compute zeros freq. Kuf_sin = tf.transpose(tf.sin(omegas_sin * (X - a))) # correct Kfu outside [a, b] lt_a_sin = tf.tile(tf.transpose(X) < a, [len(ms) - 1, 1]) gt_b_sin = tf.tile(tf.transpose(X) > b, [len(ms) - 1, 1]) lt_a_cos = tf.tile(tf.transpose(X) < a, [len(ms), 1]) gt_b_cos = tf.tile(tf.transpose(X) > b, [len(ms), 1]) if isinstance(k, GPflow.kernels.Matern12): # Kuf_sin[:, np.logical_or(X.flatten() < a, X.flatten() > b)] = 0 Kuf_sin = tf.where(tf.logical_or(lt_a_sin, gt_b_sin), tf.zeros(tf.shape(Kuf_sin), float_type), Kuf_sin) Kuf_cos = tf.where( lt_a_cos, tf.tile(tf.exp(-tf.abs(tf.transpose(X - a)) / k.lengthscales), [len(ms), 1]), Kuf_cos) Kuf_cos = tf.where( gt_b_cos, tf.tile(tf.exp(-tf.abs(tf.transpose(X - b)) / k.lengthscales), [len(ms), 1]), Kuf_cos) elif isinstance(k, GPflow.kernels.Matern32): arg = np.sqrt(3) * tf.abs(tf.transpose(X) - a) / k.lengthscales edge = tf.tile((1 + arg) * tf.exp(-arg), [len(ms), 1]) Kuf_cos = tf.where(lt_a_cos, edge, Kuf_cos) arg = np.sqrt(3) * tf.abs(tf.transpose(X) - b) / k.lengthscales edge = tf.tile((1 + arg) * tf.exp(-arg), [len(ms), 1]) Kuf_cos = tf.where(gt_b_cos, edge, Kuf_cos) arg = np.sqrt(3) * tf.abs(tf.transpose(X) - a) / k.lengthscales edge = (tf.transpose(X) - a) * tf.exp(-arg) * omegas_sin[:, None] Kuf_sin = tf.where(lt_a_sin, edge, Kuf_sin) arg = np.sqrt(3) * tf.abs(tf.transpose(X) - b) / k.lengthscales edge = (tf.transpose(X) - b) * tf.exp(-arg) * omegas_sin[:, None] Kuf_sin = tf.where(gt_b_sin, edge, Kuf_sin) elif isinstance(k, GPflow.kernels.Matern52): # edges not implemented yet Kuf_cos = tf.with_dependencies( [tf.assert_greater_equal(X, a)], Kuf_cos, message='Edges not implemented for Matern52', name='assert_left_edge') Kuf_sin = tf.with_dependencies( [tf.assert_less_equal(X, b)], Kuf_sin, message='Edges not implemented for Matern52', name='assert_right_edge') else: raise NotImplementedError return tf.concat([Kuf_cos, Kuf_sin], axis=0)
def diagonal_lstm(inputs, conf, scope='diagonal_lstm'): with tf.variable_scope(scope): skewed_inputs = skew(inputs, scope="skewed_i") # input-to-state (K_is * x_i) : 1x1 convolution. generate 4h x n x n tensor. input_to_state = conv2d(skewed_inputs, conf.hidden_dims * 4, [1, 1], "B", scope="i_to_s") column_wise_inputs = tf.transpose( input_to_state, [2, 0, 1, 3]) # [width, batch, height, hidden_dims * 4] if conf.log_level == 'DEBUG': logger.warning("[assert] check equal of skew and unskew") unskewed_inputs = unskew(skewed_inputs, scope="skewed_i") skew_assert_op = tf.Assert(tf.equal(inputs, unskewed_inputs, 'skew_check'), [unskewed_inputs]) input_to_state = tf.with_dependencies([skew_assert_op], input_to_state) width, batch, height, channel = get_shape(column_wise_inputs) rnn_inputs = tf.reshape(column_wise_inputs, [-1, width, height * channel]) # [batch, max_time, height * hidden_dims * 4] rnn_input_list = [tf.squeeze(rnn_input, squeeze_dims=[1]) for rnn_input in tf.split(split_dim=1, num_split=width, value=rnn_inputs)] cell = DiagonalLSTMCell(conf.hidden_dims, height, channel) if conf.use_dynamic_rnn: outputs, states = tf.nn.dynamic_rnn(cell, inputs=rnn_inputs, dtype=tf.float32) # [batch, width, height * hidden_dims] else: output_list, state_list = tf.nn.rnn(cell, inputs=rnn_input_list, dtype=tf.float32) # width * [batch, height * hidden_dims] packed_outputs = tf.pack(output_list, 1) # [batch, width, height * hidden_dims] width_first_outputs = tf.reshape(packed_outputs, [-1, width, height, conf.hidden_dims]) # [batch, width, height, hidden_dims] skewed_outputs = tf.transpose(width_first_outputs, [0, 2, 1, 3]) outputs = unskew(skewed_outputs) return outputs
def create_train_op(self, total_loss, optimizer, global_step=None, update_ops=None, variables_to_train=None, clip_by_global_norm=False, gradient_noise_scale=None, gradient_multipliers=None, gate_gradients=tf.train.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False): """Creates an `Operation` that evaluates the gradients and returns the loss. Args: total_loss: A `Tensor` representing the total loss. optimizer: A tf.Optimizer to use for computing the gradients. global_step: A `Tensor` representing the global step variable. If left as `_USE_GLOBAL_STEP`, then tf.contrib.framework.global_step() is used. update_ops: An optional list of updates to execute. If `update_ops` is `None`, then the update ops are set to the contents of the `tf.GraphKeys.UPDATE_OPS` collection. If `update_ops` is not `None`, but it doesn't contain all of the update ops in `tf.GraphKeys.UPDATE_OPS`, a warning will be displayed. variables_to_train: an optional list of variables to train. If None, it will default to all tf.trainable_variables(). clip_grad_global_norm: A bool, performs gradient clipping using global norm if True else performs gradient clipping using local norm. gradient_noise_scale: if not None, add noises to the gradients gradient_multipliers: if not None, a dict, multiples gradient with given args gate_gradients: How to gate the computation of gradients. See tf.Optimizer. aggregation_method: Specifies the method used to combine gradient terms. Valid values are defined in the class `AggregationMethod`. colocate_gradients_with_ops: Whether or not to try colocating the gradients with the ops that generated them. Returns: A `Tensor` that when evaluated, computes the gradients and returns the total loss value. """ if global_step is None: global_step = tf.get_variable('global_step', shape=[], dtype=tf.int64, initializer=tf.zeros_initializer, trainable=False) # Update ops use GraphKeys.UPDATE_OPS collection if update_ops is None. global_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) if update_ops is None: update_ops = global_update_ops else: update_ops = set(update_ops) if not global_update_ops.issubset(update_ops): log.warn( 'update_ops in create_train_op does not contain all the update_ops in GraphKeys.UPDATE_OPS' ) # Make sure update_ops are computed before total_loss. if update_ops: with tf.control_dependencies(update_ops): barrier = tf.no_op(name='update_barrier') total_loss = tf.with_dependencies([barrier], total_loss) if variables_to_train is None: variables_to_train = tf.trainable_variables() else: for v in variables_to_train: assert v in tf.trainable_variables() assert variables_to_train if clip_by_global_norm: grads_and_vars = self._clip_grad_global_norms( variables_to_train, total_loss, optimizer, global_norm=8, gate_gradients=gate_gradients, gradient_noise_scale=gradient_noise_scale, GATE_GRAPH=2, grad_loss=None, agre_method=aggregation_method, col_grad_ops=colocate_gradients_with_ops) else: grads_and_vars = optimizer.compute_gradients( total_loss, variables_to_train, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops) grads_and_vars = self._clip_grad_norms(grads_and_vars, max_norm=8) if gradient_multipliers is not None: grads_and_vars = self._multiply_gradients(grads_and_vars, gradient_multipliers) grad_updates = optimizer.apply_gradients(grads_and_vars, global_step=global_step) with tf.name_scope('train_op'): # Make sure total_loss is valid. total_loss = tf.check_numerics(total_loss, 'LossTensor is inf or nan') # Ensure the train_tensor computes grad_updates. with tf.control_dependencies([grad_updates]): total_loss = tf.identity(total_loss) return total_loss
def build_graph(reader, model, train_data_pattern, label_loss_fn=losses.CrossEntropyLoss(), batch_size=1000, base_learning_rate=0.01, optimizer_class=tf.train.AdamOptimizer, regularization_penalty=1e-3, num_readers=1, num_epochs=None): """Creates the Tensorflow graph. This will only be called once in the life of a training model, because after the graph is created the model will be restored from a meta graph file rather than being recreated. Args: reader: The data file reader. It should inherit from BaseReader. model: The core model (e.g. logistic or neural net). It should inherit from BaseModel. train_data_pattern: glob path to the training data files. label_loss_fn: What kind of loss to apply to the model. It should inherit from BaseLoss. batch_size: How many examples to process at a time. base_learning_rate: What learning rate to initialize the optimizer with. optimizer_class: Which optimization algorithm to use. regularization_penalty: How much weight to give the regularization loss compared to the label loss. num_readers: How many threads to use for I/O operations. num_epochs: How many passes to make over the data. 'None' means an unlimited number of passes. """ with tf.device( tf.train.replica_device_setter(FLAGS.ps_tasks, merge_devices=True)): global_step = tf.Variable(0, trainable=False, name="global_step") optimizer = optimizer_class(base_learning_rate) unused_video_id, model_input_raw, labels_batch, num_frames = ( get_input_data_tensors(reader, train_data_pattern, batch_size=batch_size, num_readers=num_readers, num_epochs=num_epochs)) tf.summary.histogram("model/input_raw", model_input_raw) feature_dim = len(model_input_raw.get_shape()) - 1 model_input = tf.nn.l2_normalize(model_input_raw, feature_dim) with tf.name_scope("model"): result = model.create_model(model_input, num_frames=num_frames, vocab_size=reader.num_classes, labels=labels_batch) for variable in slim.get_model_variables(): tf.summary.histogram(variable.op.name, variable) predictions = result["predictions"] tf.summary.histogram("model_activations", predictions) if "loss" in result.keys(): label_loss = result["loss"] else: label_loss = label_loss_fn.calculate_loss( predictions, labels_batch) tf.summary.scalar("label_loss", label_loss) if "regularization_loss" in result.keys(): reg_loss = result["regularization_loss"] else: reg_loss = tf.constant(0.0) if regularization_penalty != 0: tf.summary.scalar("reg_loss", reg_loss) # Adds update_ops (e.g., moving average updates in batch normalization) as # a dependency to the train_op. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if "update_ops" in result.keys(): update_ops += result["update_ops"] if update_ops: with tf.control_dependencies(update_ops): barrier = tf.no_op(name="gradient_barrier") label_loss = tf.with_dependencies([barrier], label_loss) # Incorporate the L2 weight penalties etc. final_loss = regularization_penalty * reg_loss + label_loss train_op = optimizer.minimize(final_loss, global_step=global_step) tf.add_to_collection("global_step", global_step) tf.add_to_collection("loss", label_loss) tf.add_to_collection("predictions", predictions) tf.add_to_collection("input_batch_raw", model_input_raw) tf.add_to_collection("input_batch", model_input) tf.add_to_collection("num_frames", num_frames) tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32)) tf.add_to_collection("train_op", train_op)
def main(_): ps_hosts = FLAGS.ps_hosts.split(",") worker_hosts = FLAGS.worker_hosts.split(",") # Create a cluster from the parameter server and worker hosts. cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) # Create and start a server for the local task. server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) if FLAGS.job_name == "ps": server.join() elif FLAGS.job_name == "worker": # Assigns ops to the local worker by default. with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % FLAGS.task_index, cluster=cluster)): # set Keras learning phase to train keras.backend.set_learning_phase(1) # do not initialize variables on the fly keras.backend.manual_variable_initialization(True) # Build Keras model model = ... # keras model predictions preds = model.output # placeholder for training targets targets = tf.placeholder(...) # our categorical crossentropy loss xent_loss = tf.reduce_mean( keras.objectives.categorical_crossentropy(targets, preds)) # we create a global_step tensor for distributed training # (a counter of iterations) global_step = tf.Variable(0, name='global_step', trainable=False) # apply regularizers if any if model.regularizers: total_loss = xent_loss * 1. # copy tensor for regularizer in model.regularizers: total_loss = regularizer(total_loss) else: total_loss = xent_loss # set up TF optimizer optimizer = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=FLAGS.momentum, epsilon=1e-8) # Set up model update ops (batch norm ops). # The gradients should only be computed after updating the moving average # of the batch normalization parameters, in order to prevent a data race # between the parameter updates and moving average computations. with tf.control_dependencies(model.updates): barrier = tf.no_op(name='update_barrier') # define gradient updates with tf.control_dependencies([barrier]): grads = optimizer.compute_gradients( total_loss, model.trainable_weights, gate_gradients=tf.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False) # define train tensor train_tensor = tf.with_dependencies([grad_updates], total_loss, name='train') # blah blah saver = tf.train.Saver() summary_op = tf.merge_all_summaries() init_op = tf.initialize_all_variables() # Create a "supervisor", which oversees the training process. sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0), logdir="/tmp/train_logs", init_op=init_op, summary_op=summary_op, saver=saver, global_step=global_step, save_model_secs=600) # The supervisor takes care of session initialization, restoring from # a checkpoint, and closing when done or an error occurs. with sv.managed_session(server.target) as sess: # Loop until the supervisor shuts down or 1000000 steps have completed. step = 0 while not sv.should_stop() and step < 1000000: # Run a training step asynchronously. # See `tf.train.SyncReplicasOptimizer` for additional details on how to # perform *synchronous* training. # feed_dict must contain the model inputs (the tensors listed in model.inputs) # and the "targets" placeholder we created ealier # it's a dictionary mapping tensors to batches of Numpy data # like: feed_dict={model.inputs[0]: np_train_data_batch, targets: np_train_labels_batch} loss_value, step_value = sess.run([train_op, global_step], feed_dict={...}) # Ask for all the services to stop. sv.stop()