Esempio n. 1
0
def make_Kuf(k, X, a, b, ms):
    omegas = 2. * np.pi * ms / (b - a)
    if float_type is tf.float32:
        omegas = omegas.astype(np.float32)
    Kuf_cos = tf.transpose(tf.cos(omegas * (X - a)))
    omegas_sin = omegas[omegas != 0]  # don't compute zeros freq.
    Kuf_sin = tf.transpose(tf.sin(omegas_sin * (X - a)))

    # correct Kfu outside [a, b]
    lt_a_sin = tf.tile(tf.transpose(X) < a, [len(ms) - 1, 1])
    gt_b_sin = tf.tile(tf.transpose(X) > b, [len(ms) - 1, 1])
    lt_a_cos = tf.tile(tf.transpose(X) < a, [len(ms), 1])
    gt_b_cos = tf.tile(tf.transpose(X) > b, [len(ms), 1])
    if isinstance(k, GPflow.kernels.Matern12):
        # Kuf_sin[:, np.logical_or(X.flatten() < a, X.flatten() > b)] = 0
        Kuf_sin = tf.where(tf.logical_or(lt_a_sin, gt_b_sin),
                           tf.zeros(tf.shape(Kuf_sin), float_type), Kuf_sin)
        Kuf_cos = tf.where(
            lt_a_cos,
            tf.tile(tf.exp(-tf.abs(tf.transpose(X - a)) / k.lengthscales),
                    [len(ms), 1]), Kuf_cos)
        Kuf_cos = tf.where(
            gt_b_cos,
            tf.tile(tf.exp(-tf.abs(tf.transpose(X - b)) / k.lengthscales),
                    [len(ms), 1]), Kuf_cos)
    elif isinstance(k, GPflow.kernels.Matern32):
        arg = np.sqrt(3) * tf.abs(tf.transpose(X) - a) / k.lengthscales
        edge = tf.tile((1 + arg) * tf.exp(-arg), [len(ms), 1])
        Kuf_cos = tf.where(lt_a_cos, edge, Kuf_cos)
        arg = np.sqrt(3) * tf.abs(tf.transpose(X) - b) / k.lengthscales
        edge = tf.tile((1 + arg) * tf.exp(-arg), [len(ms), 1])
        Kuf_cos = tf.where(gt_b_cos, edge, Kuf_cos)

        arg = np.sqrt(3) * tf.abs(tf.transpose(X) - a) / k.lengthscales
        edge = (tf.transpose(X) - a) * tf.exp(-arg) * omegas_sin[:, None]
        Kuf_sin = tf.where(lt_a_sin, edge, Kuf_sin)
        arg = np.sqrt(3) * tf.abs(tf.transpose(X) - b) / k.lengthscales
        edge = (tf.transpose(X) - b) * tf.exp(-arg) * omegas_sin[:, None]
        Kuf_sin = tf.where(gt_b_sin, edge, Kuf_sin)
    elif isinstance(k, GPflow.kernels.Matern52):
        # edges not implemented yet
        Kuf_cos = tf.with_dependencies(
            [tf.assert_greater_equal(X, a)],
            Kuf_cos,
            message='Edges not implemented for Matern52',
            name='assert_left_edge')
        Kuf_sin = tf.with_dependencies(
            [tf.assert_less_equal(X, b)],
            Kuf_sin,
            message='Edges not implemented for Matern52',
            name='assert_right_edge')
    else:
        raise NotImplementedError
    return tf.concat([Kuf_cos, Kuf_sin], axis=0)
Esempio n. 2
0
def diagonal_lstm(inputs, conf, scope='diagonal_lstm'):
  with tf.variable_scope(scope):
    skewed_inputs = skew(inputs, scope="skewed_i")

    # input-to-state (K_is * x_i) : 1x1 convolution. generate 4h x n x n tensor.
    input_to_state = conv2d(skewed_inputs, conf.hidden_dims * 4, [1, 1], "B", scope="i_to_s")
    column_wise_inputs = tf.transpose(
        input_to_state, [2, 0, 1, 3]) # [width, batch, height, hidden_dims * 4]

    if conf.log_level == 'DEBUG':
      logger.warning("[assert] check equal of skew and unskew")

      unskewed_inputs = unskew(skewed_inputs, scope="skewed_i")
      skew_assert_op = tf.Assert(tf.equal(inputs, unskewed_inputs, 'skew_check'), [unskewed_inputs])
      input_to_state = tf.with_dependencies([skew_assert_op], input_to_state)

    width, batch, height, channel = get_shape(column_wise_inputs)
    rnn_inputs = tf.reshape(column_wise_inputs,
        [-1, width, height * channel]) # [batch, max_time, height * hidden_dims * 4]

    rnn_input_list = [tf.squeeze(rnn_input, squeeze_dims=[1]) 
        for rnn_input in tf.split(split_dim=1, num_split=width, value=rnn_inputs)]

    cell = DiagonalLSTMCell(conf.hidden_dims, height, channel)

    if conf.use_dynamic_rnn:
      outputs, states = tf.nn.dynamic_rnn(cell,
          inputs=rnn_inputs, dtype=tf.float32) # [batch, width, height * hidden_dims]
    else:
      output_list, state_list = tf.nn.rnn(cell,
          inputs=rnn_input_list, dtype=tf.float32) # width * [batch, height * hidden_dims]

      packed_outputs = tf.pack(output_list, 1) # [batch, width, height * hidden_dims]
      width_first_outputs = tf.reshape(packed_outputs,
          [-1, width, height, conf.hidden_dims]) # [batch, width, height, hidden_dims]

      skewed_outputs = tf.transpose(width_first_outputs, [0, 2, 1, 3])
      outputs = unskew(skewed_outputs)

    return outputs
    def create_train_op(self,
                        total_loss,
                        optimizer,
                        global_step=None,
                        update_ops=None,
                        variables_to_train=None,
                        clip_by_global_norm=False,
                        gradient_noise_scale=None,
                        gradient_multipliers=None,
                        gate_gradients=tf.train.Optimizer.GATE_OP,
                        aggregation_method=None,
                        colocate_gradients_with_ops=False):
        """Creates an `Operation` that evaluates the gradients and returns the loss.
        Args:
            total_loss: A `Tensor` representing the total loss.
            optimizer: A tf.Optimizer to use for computing the gradients.
            global_step: A `Tensor` representing the global step variable. If left as
                `_USE_GLOBAL_STEP`, then tf.contrib.framework.global_step() is used.
            update_ops: An optional list of updates to execute. If `update_ops` is
                `None`, then the update ops are set to the contents of the
                `tf.GraphKeys.UPDATE_OPS` collection. If `update_ops` is not `None`, but
                it doesn't contain all of the update ops in `tf.GraphKeys.UPDATE_OPS`,
                a warning will be displayed.
            variables_to_train: an optional list of variables to train. If None, it will
                default to all tf.trainable_variables().
            clip_grad_global_norm: A bool, performs gradient clipping using global norm if True
                else performs gradient clipping using local norm.
            gradient_noise_scale: if not None, add noises to the gradients
            gradient_multipliers: if not None, a dict, multiples gradient with given args
            gate_gradients: How to gate the computation of gradients. See tf.Optimizer.
            aggregation_method: Specifies the method used to combine gradient terms.
                Valid values are defined in the class `AggregationMethod`.
            colocate_gradients_with_ops: Whether or not to try colocating the gradients
                with the ops that generated them.
        Returns:
            A `Tensor` that when evaluated, computes the gradients and returns the total
                loss value.
        """
        if global_step is None:
            global_step = tf.get_variable('global_step',
                                          shape=[],
                                          dtype=tf.int64,
                                          initializer=tf.zeros_initializer,
                                          trainable=False)

        # Update ops use GraphKeys.UPDATE_OPS collection if update_ops is None.
        global_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
        if update_ops is None:
            update_ops = global_update_ops
        else:
            update_ops = set(update_ops)
        if not global_update_ops.issubset(update_ops):
            log.warn(
                'update_ops in create_train_op does not contain all the update_ops in GraphKeys.UPDATE_OPS'
            )

        # Make sure update_ops are computed before total_loss.
        if update_ops:
            with tf.control_dependencies(update_ops):
                barrier = tf.no_op(name='update_barrier')
                total_loss = tf.with_dependencies([barrier], total_loss)

        if variables_to_train is None:
            variables_to_train = tf.trainable_variables()
        else:
            for v in variables_to_train:
                assert v in tf.trainable_variables()

        assert variables_to_train
        if clip_by_global_norm:
            grads_and_vars = self._clip_grad_global_norms(
                variables_to_train,
                total_loss,
                optimizer,
                global_norm=8,
                gate_gradients=gate_gradients,
                gradient_noise_scale=gradient_noise_scale,
                GATE_GRAPH=2,
                grad_loss=None,
                agre_method=aggregation_method,
                col_grad_ops=colocate_gradients_with_ops)
        else:
            grads_and_vars = optimizer.compute_gradients(
                total_loss,
                variables_to_train,
                gate_gradients=gate_gradients,
                aggregation_method=aggregation_method,
                colocate_gradients_with_ops=colocate_gradients_with_ops)
            grads_and_vars = self._clip_grad_norms(grads_and_vars, max_norm=8)

        if gradient_multipliers is not None:
            grads_and_vars = self._multiply_gradients(grads_and_vars,
                                                      gradient_multipliers)

        grad_updates = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

        with tf.name_scope('train_op'):
            # Make sure total_loss is valid.
            total_loss = tf.check_numerics(total_loss,
                                           'LossTensor is inf or nan')

        # Ensure the train_tensor computes grad_updates.
        with tf.control_dependencies([grad_updates]):
            total_loss = tf.identity(total_loss)
        return total_loss
Esempio n. 4
0
def build_graph(reader,
                model,
                train_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                base_learning_rate=0.01,
                optimizer_class=tf.train.AdamOptimizer,
                regularization_penalty=1e-3,
                num_readers=1,
                num_epochs=None):
    """Creates the Tensorflow graph.

  This will only be called once in the life of
  a training model, because after the graph is created the model will be
  restored from a meta graph file rather than being recreated.

  Args:
    reader: The data file reader. It should inherit from BaseReader.
    model: The core model (e.g. logistic or neural net). It should inherit
           from BaseModel.
    train_data_pattern: glob path to the training data files.
    label_loss_fn: What kind of loss to apply to the model. It should inherit
                from BaseLoss.
    batch_size: How many examples to process at a time.
    base_learning_rate: What learning rate to initialize the optimizer with.
    optimizer_class: Which optimization algorithm to use.
    regularization_penalty: How much weight to give the regularization loss
                            compared to the label loss.
    num_readers: How many threads to use for I/O operations.
    num_epochs: How many passes to make over the data. 'None' means an
                unlimited number of passes.
  """
    with tf.device(
            tf.train.replica_device_setter(FLAGS.ps_tasks,
                                           merge_devices=True)):
        global_step = tf.Variable(0, trainable=False, name="global_step")
        optimizer = optimizer_class(base_learning_rate)
        unused_video_id, model_input_raw, labels_batch, num_frames = (
            get_input_data_tensors(reader,
                                   train_data_pattern,
                                   batch_size=batch_size,
                                   num_readers=num_readers,
                                   num_epochs=num_epochs))
        tf.summary.histogram("model/input_raw", model_input_raw)

        feature_dim = len(model_input_raw.get_shape()) - 1

        model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)

        with tf.name_scope("model"):
            result = model.create_model(model_input,
                                        num_frames=num_frames,
                                        vocab_size=reader.num_classes,
                                        labels=labels_batch)

            for variable in slim.get_model_variables():
                tf.summary.histogram(variable.op.name, variable)

            predictions = result["predictions"]
            tf.summary.histogram("model_activations", predictions)
            if "loss" in result.keys():
                label_loss = result["loss"]
            else:
                label_loss = label_loss_fn.calculate_loss(
                    predictions, labels_batch)
            tf.summary.scalar("label_loss", label_loss)

            if "regularization_loss" in result.keys():
                reg_loss = result["regularization_loss"]
            else:
                reg_loss = tf.constant(0.0)
            if regularization_penalty != 0:
                tf.summary.scalar("reg_loss", reg_loss)

        # Adds update_ops (e.g., moving average updates in batch normalization) as
        # a dependency to the train_op.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if "update_ops" in result.keys():
            update_ops += result["update_ops"]
        if update_ops:
            with tf.control_dependencies(update_ops):
                barrier = tf.no_op(name="gradient_barrier")
                label_loss = tf.with_dependencies([barrier], label_loss)

        # Incorporate the L2 weight penalties etc.
        final_loss = regularization_penalty * reg_loss + label_loss
        train_op = optimizer.minimize(final_loss, global_step=global_step)

        tf.add_to_collection("global_step", global_step)
        tf.add_to_collection("loss", label_loss)
        tf.add_to_collection("predictions", predictions)
        tf.add_to_collection("input_batch_raw", model_input_raw)
        tf.add_to_collection("input_batch", model_input)
        tf.add_to_collection("num_frames", num_frames)
        tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
        tf.add_to_collection("train_op", train_op)
def main(_):
    ps_hosts = FLAGS.ps_hosts.split(",")
    worker_hosts = FLAGS.worker_hosts.split(",")

    # Create a cluster from the parameter server and worker hosts.
    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})

    # Create and start a server for the local task.
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_index)

    if FLAGS.job_name == "ps":
        server.join()
    elif FLAGS.job_name == "worker":
        # Assigns ops to the local worker by default.
        with tf.device(
                tf.train.replica_device_setter(
                    worker_device="/job:worker/task:%d" % FLAGS.task_index,
                    cluster=cluster)):

            # set Keras learning phase to train
            keras.backend.set_learning_phase(1)
            # do not initialize variables on the fly
            keras.backend.manual_variable_initialization(True)

            # Build Keras model
            model = ...

            # keras model predictions
            preds = model.output
            # placeholder for training targets
            targets = tf.placeholder(...)
            # our categorical crossentropy loss
            xent_loss = tf.reduce_mean(
                keras.objectives.categorical_crossentropy(targets, preds))

            # we create a global_step tensor for distributed training
            # (a counter of iterations)
            global_step = tf.Variable(0, name='global_step', trainable=False)

            # apply regularizers if any
            if model.regularizers:
                total_loss = xent_loss * 1.  # copy tensor
                for regularizer in model.regularizers:
                    total_loss = regularizer(total_loss)
            else:
                total_loss = xent_loss

            # set up TF optimizer
            optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                  decay=0.9,
                                                  momentum=FLAGS.momentum,
                                                  epsilon=1e-8)

            # Set up model update ops (batch norm ops).
            # The gradients should only be computed after updating the moving average
            # of the batch normalization parameters, in order to prevent a data race
            # between the parameter updates and moving average computations.
            with tf.control_dependencies(model.updates):
                barrier = tf.no_op(name='update_barrier')

            # define gradient updates
            with tf.control_dependencies([barrier]):
                grads = optimizer.compute_gradients(
                    total_loss,
                    model.trainable_weights,
                    gate_gradients=tf.Optimizer.GATE_OP,
                    aggregation_method=None,
                    colocate_gradients_with_ops=False)

            # define train tensor
            train_tensor = tf.with_dependencies([grad_updates],
                                                total_loss,
                                                name='train')

            # blah blah
            saver = tf.train.Saver()
            summary_op = tf.merge_all_summaries()
            init_op = tf.initialize_all_variables()

            # Create a "supervisor", which oversees the training process.
            sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0),
                                     logdir="/tmp/train_logs",
                                     init_op=init_op,
                                     summary_op=summary_op,
                                     saver=saver,
                                     global_step=global_step,
                                     save_model_secs=600)

            # The supervisor takes care of session initialization, restoring from
            # a checkpoint, and closing when done or an error occurs.
            with sv.managed_session(server.target) as sess:
                # Loop until the supervisor shuts down or 1000000 steps have completed.
                step = 0
                while not sv.should_stop() and step < 1000000:
                    # Run a training step asynchronously.
                    # See `tf.train.SyncReplicasOptimizer` for additional details on how to
                    # perform *synchronous* training.

                    # feed_dict must contain the model inputs (the tensors listed in model.inputs)
                    # and the "targets" placeholder we created ealier
                    # it's a dictionary mapping tensors to batches of Numpy data
                    # like: feed_dict={model.inputs[0]: np_train_data_batch, targets: np_train_labels_batch}
                    loss_value, step_value = sess.run([train_op, global_step],
                                                      feed_dict={...})

            # Ask for all the services to stop.
            sv.stop()