Esempio n. 1
0
def build_graph(reader,
                model,
                input_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                distill_reader=None,
                transformer_class=feature_transform.DefaultTransformer):

    video_id, model_input_raw, labels_batch, num_frames = (
        get_input_data_tensors(reader,
                               input_data_pattern,
                               batch_size=batch_size))

    feature_transformer = transformer_class()
    model_input, num_frames = feature_transformer.transform(
        model_input_raw, num_frames=num_frames)

    with tf.name_scope("model"):
        if FLAGS.noise_level > 0:
            noise_level_tensor = tf.placeholder_with_default(
                0.0, shape=[], name="noise_level")
        else:
            noise_level_tensor = None

        if FLAGS.dropout:
            keep_prob_tensor = tf.placeholder_with_default(1.0,
                                                           shape=[],
                                                           name="keep_prob")
            result = model.create_model(model_input,
                                        num_frames=num_frames,
                                        vocab_size=reader.num_classes,
                                        labels=labels_batch,
                                        dropout=FLAGS.dropout,
                                        keep_prob=keep_prob_tensor,
                                        noise_level=noise_level_tensor,
                                        is_training=False)
        else:
            result = model.create_model(model_input,
                                        num_frames=num_frames,
                                        vocab_size=reader.num_classes,
                                        labels=labels_batch,
                                        noise_level=noise_level_tensor,
                                        is_training=False)

        print "result", result
        predictions = result["predictions"]

        tf.add_to_collection("predictions", predictions)
        tf.add_to_collection("video_id_batch", video_id)
        tf.add_to_collection("input_batch_raw", model_input_raw)
        tf.add_to_collection("input_batch", model_input)
        tf.add_to_collection("num_frames", num_frames)
        tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
        if FLAGS.dropout:
            tf.add_to_collection("keep_prob", keep_prob_tensor)
        if FLAGS.noise_level > 0:
            tf.add_to_collection("noise_level", noise_level_tensor)
Esempio n. 2
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     num_frames,
                     labels,
                     scope='default',
                     is_training=True,
                     **unused_params):

        with tf.variable_scope(scope, tf.AUTO_REUSE):

            with tf.variable_scope('lstm1', tf.AUTO_REUSE):
                lstm1 = tf.contrib.rnn.MultiRNNCell(
                    [tf.contrib.rnn.BasicLSTMCell(lstm_size, forget_bias=1.0)])

                outputs1, _ = tf.nn.dynamic_rnn(lstm1,
                                                model_input,
                                                sequence_length=num_frames,
                                                dtype=tf.float32,
                                                swap_memory=True)
            with tf.variable_scope('lstm2', tf.AUTO_REUSE):
                lstm2 = tf.contrib.rnn.MultiRNNCell(
                    [tf.contrib.rnn.BasicLSTMCell(lstm_size, forget_bias=1.0)])

                outputs2, _ = tf.nn.dynamic_rnn(lstm2,
                                                outputs1,
                                                sequence_length=num_frames,
                                                dtype=tf.float32,
                                                swap_memory=True)
            with tf.variable_scope('lstm3', tf.AUTO_REUSE):
                lstm3 = tf.contrib.rnn.MultiRNNCell(
                    [tf.contrib.rnn.BasicLSTMCell(lstm_size, forget_bias=1.0)])

                outputs, state = tf.nn.dynamic_rnn(lstm3,
                                                   outputs2 + outputs1,
                                                   sequence_length=num_frames,
                                                   dtype=tf.float32,
                                                   swap_memory=True)

            if FLAGS.lstm_pooling_method == 'last':
                inp = state[-1].h
            else:
                inp = utils.FramePooling(outputs, FLAGS.lstm_pooling_method)

            aggregated_model = getattr(video_level_models,
                                       FLAGS.video_level_classifier_model)

            results = aggregated_model().create_model(model_input=inp,
                                                      vocab_size=vocab_size,
                                                      is_training=is_training,
                                                      **unused_params)
            results['features'] = inp
            if labels != None:
                results['loss'] = losses.CrossEntropyLoss().calculate_loss(
                    results['predictions'], labels)
        return results
Esempio n. 3
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     num_frames,
                     labels,
                     scope='default',
                     is_training=True,
                     **unused_params):
        """Creates a model which uses a stack of LSTMs to represent the video.

    Args:
      model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
                   input features.
      vocab_size: The number of classes in the dataset.
      num_frames: A vector of length 'batch' which indicates the number of
           frames for each video (before padding).

    Returns:
      A dictionary with a tensor containing the probability predictions of the
      model in the 'predictions' key. The dimensions of the tensor are
      'batch_size' x 'num_classes'.
    """
        lstm_size = FLAGS.lstm_cells
        number_of_layers = FLAGS.lstm_layers

        with tf.variable_scope(scope, tf.AUTO_REUSE):
            stacked_lstm = tf.contrib.rnn.MultiRNNCell([
                tf.contrib.rnn.BasicLSTMCell(lstm_size, forget_bias=1.0)
                for _ in range(number_of_layers)
            ])

            outputs, state = tf.nn.dynamic_rnn(stacked_lstm,
                                               model_input,
                                               sequence_length=num_frames,
                                               dtype=tf.float32,
                                               swap_memory=True)

            aggregated_model = getattr(video_level_models,
                                       FLAGS.video_level_classifier_model)
            if FLAGS.lstm_pooling_method == 'last':
                inp = state[-1].h
            else:
                inp = utils.FramePooling(outputs, FLAGS.lstm_pooling_method)
            results = aggregated_model().create_model(model_input=inp,
                                                      vocab_size=vocab_size,
                                                      is_training=is_training,
                                                      **unused_params)
            results['features'] = inp
            if labels != None:
                results['loss'] = losses.CrossEntropyLoss().calculate_loss(
                    results['predictions'], labels)
        return results
Esempio n. 4
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     num_frames,
                     labels,
                     scope='default',
                     is_training=True,
                     **unused_params):
        lstm_size = FLAGS.lstm_cells
        number_of_layers = FLAGS.lstm_layers
        with tf.variable_scope(scope, tf.AUTO_REUSE):
            stacked_lstm_fw = tf.contrib.rnn.MultiRNNCell([
                tf.contrib.rnn.BasicLSTMCell(lstm_size, forget_bias=1.0)
                for _ in range(number_of_layers)
            ])

            stacked_lstm_bw = tf.contrib.rnn.MultiRNNCell([
                tf.contrib.rnn.BasicLSTMCell(lstm_size, forget_bias=1.0)
                for _ in range(number_of_layers)
            ])

            outputs, state = tf.nn.bidirectional_dynamic_rnn(
                stacked_lstm_fw,
                stacked_lstm_bw,
                model_input,
                sequence_length=num_frames,
                dtype=tf.float32,
                swap_memory=True)

            if FLAGS.lstm_pooling_method == 'last':
                l = [state[i][-1].h for i in range(2)]
            else:
                l = [
                    utils.FramePooling(outputs[0], FLAGS.lstm_pooling_method),
                    utils.FramePooling(outputs[1], FLAGS.lstm_pooling_method)
                ]

            output = tf.concat(l, 1)

            aggregated_model = getattr(video_level_models,
                                       FLAGS.video_level_classifier_model)

            results = aggregated_model().create_model(model_input=output,
                                                      vocab_size=vocab_size,
                                                      is_training=is_training,
                                                      **unused_params)
            results['features'] = output
            if labels != None:
                results['loss'] = losses.CrossEntropyLoss().calculate_loss(
                    results['predictions'], labels)
        return results
Esempio n. 5
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     labels,
                     scope='default',
                     is_training=True,
                     **unused_params):
        X = FLAGS.residualcnn_x
        with tf.variable_scope(scope, tf.AUTO_REUSE):
            fc = slim.fully_connected(
                model_input,
                X,
                weights_regularizer=tf.contrib.layers.l2_regularizer(0.01))
            reshaped_input = tf.expand_dims(fc, -1)
            reshaped_input = tf.expand_dims(reshaped_input, -1)

            conv1 = slim.convolution(reshaped_input, 64, [49, 1])
            conv1_norm = slim.batch_norm(conv1, is_training=is_training)

            module1 = self.residual_module([128, 192, 64], conv1_norm,
                                           'module1')
            module1_norm = slim.batch_norm(module1, is_training=is_training)

            conv2 = slim.convolution(module1_norm, 128, 1)
            conv2_norm = slim.batch_norm(conv2, is_training=is_training)

            module2 = self.residual_module([256, 512, 128], conv2_norm,
                                           'module2')
            module2_norm = slim.batch_norm(module2, is_training=is_training)

            conv3 = slim.convolution(module2_norm, 256, 1)
            conv3_norm = slim.batch_norm(conv3, is_training=is_training)

            module3 = self.residual_module([512, 256], conv3_norm, 'module3')
            module3_norm = slim.batch_norm(module3, is_training=is_training)

            conv4 = slim.convolution(module3_norm, X, 1)
            conv4_norm = slim.batch_norm(conv4, is_training=is_training)

            module4 = self.residual_module([512, X], conv4_norm, 'module4')

            features = tf.squeeze(module4, [2])
            features = model_utils.FramePooling(features,
                                                FLAGS.residualcnn_pooling) + fc
            results = MoeModel().create_model(features, vocab_size)
            results['features'] = features
            if labels != None:
                results['loss'] = losses.CrossEntropyLoss().calculate_loss(
                    results['predictions'], labels)
            return results
Esempio n. 6
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     num_frames,
                     labels,
                     scope='default',
                     **unused_params):

        lstm_size = FLAGS.lstm_cells
        with tf.variable_scope(scope, tf.AUTO_REUSE):
            cells = tf.contrib.rnn.MultiRNNCell(
                [tf.contrib.rnn.BasicLSTMCell(lstm_size, forget_bias=1.0)])

            outputs1, _ = tf.nn.dynamic_rnn(cells,
                                            model_input,
                                            sequence_length=num_frames,
                                            dtype=tf.float32,
                                            swap_memory=True,
                                            scope='first')
            cells1 = tf.contrib.rnn.MultiRNNCell(
                [tf.contrib.rnn.BasicLSTMCell(lstm_size, forget_bias=1.0)])

            outputs2, state2 = tf.nn.dynamic_rnn(cells1,
                                                 outputs1[:, 0:300:2, :],
                                                 sequence_length=num_frames /
                                                 2,
                                                 dtype=tf.float32,
                                                 swap_memory=True,
                                                 scope='second')

            aggregated_model = getattr(video_level_models,
                                       FLAGS.video_level_classifier_model)

            if FLAGS.lstm_pooling_method == 'last':
                output = state2[-1].h
            else:
                output = utils.FramePooling(outputs2,
                                            FLAGS.lstm_pooling_method)
            results = aggregated_model().create_model(model_input=output,
                                                      vocab_size=vocab_size,
                                                      **unused_params)
            results['features'] = output
            if labels != None:
                results['loss'] = losses.CrossEntropyLoss().calculate_loss(
                    results['predictions'], labels)
        return results
Esempio n. 7
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     num_frames,
                     labels,
                     scope='default',
                     is_training=True,
                     **unused_params):
        results = {}
        with tf.variable_scope(scope, tf.AUTO_REUSE):
            rgb_input = tf.slice(model_input, [0, 0, 0], [-1, -1, 1024])
            audio_input = tf.slice(model_input, [0, 0, 1024], [-1, -1, 128])

            rgb_model = globals()[FLAGS.rgb_frame_level_model]
            audio_model = globals()[FLAGS.audio_frame_level_model]

            rgb_results = rgb_model().create_model(model_input=rgb_input,
                                                   vocab_size=vocab_size,
                                                   num_frames=num_frames,
                                                   labels=labels,
                                                   scope='rgb',
                                                   is_training=is_training,
                                                   **unused_params)

            audio_results = audio_model().create_model(
                model_input=audio_input,
                vocab_size=vocab_size,
                num_frames=num_frames,
                labels=labels,
                scope='audio',
                is_training=is_training**unused_params)
            if labels != None:
                results['loss'] = rgb_results['loss'] + audio_results['loss']
            aggregated_model = getattr(video_level_models,
                                       FLAGS.video_level_classifier_model)
            features = rgb_results['features'] + audio_results['features']
            output = aggregated_model().create_model(model_input=features,
                                                     vocab_size=vocab_size,
                                                     is_training=is_training,
                                                     **unused_params)

            if labels != None:
                results['loss'] += 6 * losses.CrossEntropyLoss(
                ).calculate_loss(output['predictions'], labels)
            results['predictions'] = output['predictions']
        return results
Esempio n. 8
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     num_frames,
                     labels,
                     scope='default',
                     **unused_params):
        """Creates a model which uses a logistic classifier over the average of the
    frame-level features.

    This class is intended to be an example for implementors of frame level
    models. If you want to train a model over averaged features it is more
    efficient to average them beforehand rather than on the fly.

    Args:
      model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
                   input features.
      vocab_size: The number of classes in the dataset.
      num_frames: A vector of length 'batch' which indicates the number of
           frames for each video (before padding).

    Returns:
      A dictionary with a tensor containing the probability predictions of the
      model in the 'predictions' key. The dimensions of the tensor are
      'batch_size' x 'num_classes'.
    """
        num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
        feature_size = model_input.get_shape().as_list()[2]

        denominators = tf.reshape(tf.tile(num_frames, [1, feature_size]),
                                  [-1, feature_size])
        avg_pooled = tf.reduce_sum(model_input, axis=[1]) / denominators

        with tf.variable_scope(scope, tf.AUTO_REUSE):
            output = slim.fully_connected(
                avg_pooled,
                vocab_size,
                activation_fn=tf.nn.sigmoid,
                weights_regularizer=slim.l2_regularizer(1e-8))

        return {
            "predictions": output,
            "features": avg_pooled,
            "loss": losses.CrossEntropyLoss().calculate_loss(output, labels)
        }
Esempio n. 9
0
    def build_model(self, model, reader):
        """Find the model and build the graph."""

        label_loss_fn = losses.CrossEntropyLoss()
        optimizer_class = tf.train.AdamOptimizer

        build_graph(
            reader=reader,
            model=model,
            optimizer_class=optimizer_class,
            clip_gradient_norm=FLAGS.clip_gradient_norm,
            train_data_pattern=FLAGS.train_data_pattern,
            label_loss_fn=label_loss_fn,
            base_learning_rate=FLAGS.base_learning_rate,
            learning_rate_decay=FLAGS.learning_rate_decay,
            learning_rate_decay_examples=FLAGS.learning_rate_decay_examples,
            regularization_penalty=FLAGS.regularization_penalty,
            num_readers=FLAGS.num_readers,
            batch_size=FLAGS.batch_size,
            num_epochs=FLAGS.num_epochs)

        return tf.train.Saver(max_to_keep=0,
                              keep_checkpoint_every_n_hours=0.25)
Esempio n. 10
0
def build_graph(model,
                reader,
                train_data_pattern,
                test_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                base_learning_rate=0.01,
                learning_rate_decay_steps=10000,
                learning_rate_decay=0.95,
                optimizer_class=tf.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_readers=1,
                num_epochs=None):
    """
    Creates the Tensorflow graph.
    :param model: The core model (e.g. logistic or neural net). It should inherit from BaseModel.
    :param reader: The data file reader. It should inherit from BaseReader.
    :param train_data_pattern: path to the train data files.
    :param test_data_pattern: path to the test data files.
    :param label_loss_fn: loss to apply to the model. It should inherit from BaseLoss.
    :param base_learning_rate: learning rate to initialize the optimizer with.
    :param learning_rate_decay_steps:
    :param learning_rate_decay:
    :param optimizer_class: Which optimization algorithm to use.
    :param clip_gradient_norm: Magnitude of the gradient to clip to.
    :param regularization_penalty: How much weight to give the regularization loss compared to the label loss.
    :param num_readers:
    :param num_epochs:
    :return:
    """

    global_step = tf.Variable(0, trainable=False, name="global_step")

    learning_rate = tf.train.exponential_decay(
        base_learning_rate,
        global_step,
        learning_rate_decay_steps,
        learning_rate_decay,
        staircase=True)

    optimizer = optimizer_class(learning_rate)

    _, train_stk_input, train_stk_label = get_input_train_tensors(
        reader, train_data_pattern, batch_size=FLAGS.train_batch_size, num_readers=num_readers, num_epochs=num_epochs)

    _, test_stk_input, test_stk_label = get_input_test_tensors(
        reader, test_data_pattern, batch_size=FLAGS.test_batch_size, num_readers=1)

    train_stk_feature_dim = len(train_stk_input.get_shape()) - 1
    test_stk_feature_dim = len(test_stk_input.get_shape()) - 1

    assert train_stk_feature_dim == test_stk_feature_dim

    train_stk_model_input = tf.nn.l2_normalize(train_stk_input, train_stk_feature_dim)
    test_stk_model_input = tf.nn.l2_normalize(test_stk_input, test_stk_feature_dim)

    with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
        train_result = model.create_model(train_stk_model_input)
        test_result = model.create_model(test_stk_model_input)

        train_predictions = train_result["predictions"]
        test_predictions = test_result["predictions"]
        stk_embedding = test_result["stk_embedding"]

        if "loss" in train_result.keys():
            train_loss = train_result["loss"]
        else:
            train_loss = label_loss_fn.calculate_loss(train_predictions, train_stk_label)

        train_aux_loss = tf.constant(0.0)
        if "aux_predictions" in train_result.keys():
            for pred in train_result["aux_predictions"]:
                train_aux_loss += label_loss_fn.calculate_loss(pred, test_stk_label)

        if "regularization_loss" in train_result.keys():
            train_reg_loss = train_result["regularization_loss"]
        else:
            train_reg_loss = tf.constant(0.0)

        train_reg_losses = tf.losses.get_regularization_losses()
        if train_reg_losses:
            train_reg_loss += tf.add_n(train_reg_losses)

        if "loss" in test_result.keys():
            test_loss = test_result["loss"]
        else:
            test_loss = label_loss_fn.calculate_loss(test_predictions, test_stk_label)

        # A dependency to the train_op.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if "update_ops" in train_result.keys():
            update_ops += train_result["update_ops"]
        if update_ops:
            with tf.control_dependencies(update_ops):
                barrier = tf.no_op(name="gradient_barrier")
                with tf.control_dependencies([barrier]):
                    train_loss = tf.identity(train_loss)
                    train_aux_loss = tf.identity(train_aux_loss)

        # Incorporate the L2 weight penalties etc.
        train_final_loss = regularization_penalty * train_reg_loss + train_loss + train_aux_loss

        train_op = slim.learning.create_train_op(
            train_final_loss,
            optimizer,
            global_step=global_step,
            clip_gradient_norm=clip_gradient_norm)

        tf.add_to_collection("global_step", global_step)
        tf.add_to_collection("train_loss", train_loss)
        tf.add_to_collection("test_top_loss", test_loss)
        tf.add_to_collection("train_predictions", train_predictions)
        tf.add_to_collection("test_predictions", test_predictions)
        tf.add_to_collection("train_stk_input", train_stk_input)
        tf.add_to_collection("train_stk_model_input", train_stk_model_input)
        tf.add_to_collection("test_stk_input", test_stk_input)
        tf.add_to_collection("test_stk_model_input", test_stk_model_input)
        tf.add_to_collection("train_stk_label", tf.cast(train_stk_label, tf.float32))
        tf.add_to_collection("test_stk_label", tf.cast(test_stk_label, tf.float32))
        tf.add_to_collection("stk_embedding", stk_embedding)
        tf.add_to_collection("train_op", train_op)
    def build_model(self):
        """Builds the model.

    Inputs:
      self.image_embeddings
      self.target_seqs (training and eval only)
      self.input_mask (training and eval only)

    Outputs:
      self.total_loss (training and eval only)
      self.target_cross_entropy_losses (training and eval only)
      self.target_cross_entropy_loss_weights (training and eval only)
    """

        caption_model_fn = find_class_by_name(FLAGS.model, [im2txt_models])
        caption_model = caption_model_fn()

        # model
        outputs = caption_model.create_model(
            input_seqs=self.input_seqs,
            image_model_output=self.image_model_output,
            initializer=self.initializer,
            mode=self.mode,
            target_seqs=self.target_seqs,
            global_step=self.global_step,
            input_mask=self.input_mask,
            target_lengths=self.target_lengths)

        # loss
        if self.mode == "inference":
            if "logits" in outputs:
                tf.nn.softmax(outputs["logits"], name="softmax")
            elif "bs_results" in outputs:
                self.predicted_ids = outputs["bs_results"].predicted_ids
                self.scores = outputs[
                    "bs_results"].beam_search_decoder_output.scores
                if "bs_results_lengths" in outputs:
                    self.predicted_ids_lengths = outputs["bs_results_lengths"]
            if "top_n_attributes" in outputs:
                self.top_n_attributes = outputs["top_n_attributes"]
        else:
            if "mle_caption_logits" in outputs:
                logits = tf.reshape(outputs["mle_caption_logits"],
                                    [-1, FLAGS.vocab_size])
                targets = tf.reshape(self.target_seqs, [-1])
                weights = tf.to_float(tf.reshape(self.input_mask, [-1]))

                # Compute losses.
                mle_loss_fn = losses.SparseSoftmaxCrossEntropyLoss()
                mle_loss = mle_loss_fn.calculate_loss(logits, targets, weights)

                # Logging losses.
                tf.summary.scalar("losses/mle_loss", mle_loss)
                tf.losses.add_loss(mle_loss)

            # caption loss
            if FLAGS.rl_training == True:
                # rl loss
                # load greed caption and sample caption to calculate reward
                target_caption_words = self.target_seqs
                target_caption_lengths = self.target_lengths
                greedy_caption_words = outputs["greedy_caption_words"]
                greedy_caption_lengths = outputs["greedy_caption_lengths"]
                sample_caption_logits = outputs["sample_caption_logits"]
                sample_caption_words = outputs["sample_caption_words"]
                sample_caption_lengths = outputs["sample_caption_lengths"]

                if get_rank(target_caption_words) == 2:
                    target_caption_words = tf.expand_dims(
                        target_caption_words, 1)
                if get_rank(target_caption_lengths) == 1:
                    target_caption_lengths = tf.expand_dims(
                        target_caption_lengths, 1)

                if get_shape_as_list(target_caption_words)[-1] is None:
                    target_caption_words, target_caption_lengths = \
                        pad_or_truncate(target_caption_words, target_caption_lengths,
                                        axis = -1, max_length = FLAGS.max_ref_length)
                if get_shape_as_list(greedy_caption_words)[-1] is None:
                    greedy_caption_words, greedy_caption_lengths = \
                        pad_or_truncate(greedy_caption_words, greedy_caption_lengths,
                                        axis = -1, max_length = FLAGS.max_caption_length)
                if get_shape_as_list(sample_caption_logits)[1] is None:
                    sample_caption_logits, _ = \
                        pad_or_truncate(sample_caption_logits, sample_caption_lengths,
                                        axis = 1, max_length = FLAGS.max_caption_length)
                if get_shape_as_list(sample_caption_words)[-1] is None:
                    sample_caption_words, sample_caption_lengths = \
                        pad_or_truncate(sample_caption_words, sample_caption_lengths,
                                        axis = -1, max_length = FLAGS.max_caption_length)

                if FLAGS.rl_beam_search_approximation:
                    target_caption_words = tf.contrib.seq2seq.tile_batch(
                        target_caption_words, multiplier=FLAGS.beam_width)
                    target_caption_lengths = tf.contrib.seq2seq.tile_batch(
                        target_caption_lengths, multiplier=FLAGS.beam_width)
                rl_loss_cls = find_class_by_name(FLAGS.rl_training_loss,
                                                 [losses])
                rl_loss_fn = rl_loss_cls()
                rl_loss = rl_loss_fn.calculate_loss(
                    target_caption_words=target_caption_words,
                    target_caption_lengths=target_caption_lengths,
                    greedy_caption_words=greedy_caption_words,
                    greedy_caption_lengths=greedy_caption_lengths,
                    sample_caption_words=sample_caption_words,
                    sample_caption_lengths=sample_caption_lengths,
                    sample_caption_logits=sample_caption_logits)

                tf.losses.add_loss(rl_loss)

            else:
                if "logits" in outputs:
                    # prepare logits, targets and weight
                    logits = outputs["logits"]
                    logits = tf.reshape(
                        logits, [FLAGS.batch_size, -1, FLAGS.vocab_size])
                    logits, _ = pad_or_truncate(
                        logits, None, axis=1, max_length=FLAGS.max_ref_length)
                    logits = tf.reshape(logits, [-1, FLAGS.vocab_size])
                    targets = tf.reshape(self.target_seqs, [-1])
                    weights = tf.to_float(tf.reshape(self.input_mask, [-1]))

                    # Compute losses.
                    loss_fn = losses.SparseSoftmaxCrossEntropyLoss()
                    batch_loss = loss_fn.calculate_loss(
                        logits, targets, weights)

                    # Logging losses.
                    tf.summary.scalar("losses/batch_loss", batch_loss)
                    tf.losses.add_loss(batch_loss)

                    self.target_cross_entropy_losses = batch_loss  # Used in evaluation.
                    self.target_cross_entropy_loss_weights = weights  # Used in evaluation.

                # multi-label-loss
                if "attributes_logits" in outputs and "attributes_mask" in outputs:
                    attributes_logits = outputs["attributes_logits"]
                    attributes_targets = get_attributes_target(
                        self.target_seqs, attributes_mask)
                    if FLAGS.use_idf_weighted_attribute_loss:
                        attributes_mask = outputs["idf_weighted_mask"]
                    else:
                        attributes_mask = outputs["attributes_mask"]

                    attributes_loss_fn = losses.CrossEntropyLoss()
                    attributes_loss = attributes_loss_fn.calculate_loss(
                        attributes_logits, attributes_targets, attributes_mask)

                    tf.losses.add_loss(attributes_loss)
                    tf.summary.scalar("losses/attributes_loss",
                                      attributes_loss)
                    self.attributes_loss = attributes_loss

                # discriminative loss
                # should be multi-label margin loss, but the loss below is a little different
                if "discriminative_logits" in outputs:
                    word_labels = caption_to_multi_labels(self.target_seqs)
                    discriminative_loss = tf.losses.hinge_loss(
                        labels=word_labels,
                        logits=outputs["discriminative_logits"],
                        weights=FLAGS.discriminative_loss_weights)
                    tf.summary.scalar("losses/discriminative_loss",
                                      discriminative_loss)
                    self.discriminative_loss = discriminative_loss

                # word weighted cross entropy loss
                if "word_predictions" in outputs:
                    word_loss_fn = losses.CrossEntropyLoss()
                    word_loss = word_loss_fn.calculate_loss(
                        outputs["word_predictions"],
                        caption_to_multi_labels(self.target_seqs))
                    tf.summary.scalar("losses/word_loss", word_loss)
                    tf.losses.add_loss(word_loss)
                    self.word_loss = word_loss

            total_loss = tf.losses.get_total_loss()

            # Add summaries.
            tf.summary.scalar("losses/total_loss", total_loss)
            for var in tf.trainable_variables():
                tf.summary.histogram("parameters/" + var.op.name, var)

            self.total_loss = total_loss
Esempio n. 12
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     labels,
                     scope='default',
                     is_training=True,
                     **unused_params):

        with tf.variable_scope(scope, tf.AUTO_REUSE):
            reshaped_input = tf.expand_dims(model_input, -1)
            reshaped_input = tf.expand_dims(reshaped_input, -1)

            conv1 = slim.convolution(reshaped_input,
                                     64, [49, 1],
                                     stride=(4, 1))
            max_pool1 = slim.max_pool2d(conv1, (9, 1), (2, 1), padding='SAME')
            norm1 = tf.nn.local_response_normalization(max_pool1)

            conv2 = slim.convolution(norm1, 64, 1, 1)
            conv3 = slim.convolution(conv2, 192, (9, 1), 1)
            norm2 = tf.nn.local_response_normalization(conv3)
            max_pool2 = slim.max_pool2d(norm2, (9, 1), (2, 1), padding='SAME')

            inception3a = self.inception_module(max_pool2,
                                                [64, 96, 128, 16, 32, 32],
                                                '3a')
            inception3b = self.inception_module(inception3a,
                                                [128, 128, 192, 32, 96, 64],
                                                '3b')

            max_pool3 = slim.max_pool2d(inception3b, (9, 1), (2, 1),
                                        padding='SAME')

            inception4a = self.inception_module(max_pool3,
                                                [192, 96, 208, 16, 48, 64],
                                                '4a')
            inception4b = self.inception_module(inception4a,
                                                [160, 112, 224, 24, 64, 64],
                                                '4b')
            inception4c = self.inception_module(inception4b,
                                                [128, 128, 256, 24, 64, 64],
                                                '4c')
            inception4d = self.inception_module(inception4c,
                                                [112, 144, 288, 32, 64, 64],
                                                '4d')
            inception4e = self.inception_module(inception4d,
                                                [256, 160, 320, 32, 128, 128],
                                                '4e')

            max_pool4 = slim.max_pool2d(inception4e, (9, 1), (2, 1),
                                        padding='SAME')

            inception5a = self.inception_module(max_pool4,
                                                [256, 160, 320, 32, 128, 128],
                                                '5a')
            inception5b = self.inception_module(inception5a,
                                                [384, 192, 384, 48, 128, 128],
                                                '5b')

            inter1 = tf.squeeze(inception4a, axis=[2])
            inter2 = tf.squeeze(inception4d, axis=[2])
            output = tf.squeeze(inception5b, axis=[2])
            inter1 = model_utils.FramePooling(inter1, FLAGS.googlenet_pooling)
            inter2 = model_utils.FramePooling(inter2, FLAGS.googlenet_pooling)
            output = model_utils.FramePooling(output, FLAGS.googlenet_pooling)

            inter_results1 = MoeModel().create_model(inter1, vocab_size,
                                                     'inter1')
            inter_results2 = MoeModel().create_model(inter2, vocab_size,
                                                     'inter2')
            results = MoeModel().create_model(output, vocab_size, 'final')
            results['features'] = output
            if labels != None:
                results['loss'] = losses.CrossEntropyLoss().calculate_loss(
                    results['predictions'], labels)
                results['loss'] += losses.CrossEntropyLoss().calculate_loss(
                    inter_results1['predictions'], labels)
                results['loss'] += losses.CrossEntropyLoss().calculate_loss(
                    inter_results2['predictions'], labels)

            return results
Esempio n. 13
0
def build_graph(reader,
                model,
                train_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                base_learning_rate=0.01,
                learning_rate_decay_examples=1000000,
                learning_rate_decay=0.95,
                optimizer_class=tf.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_readers=1,
                num_epochs=None):

    global_step = tf.Variable(0, trainable=False, name="global_step")

    learning_rate = tf.train.exponential_decay(base_learning_rate,
                                               global_step * batch_size,
                                               learning_rate_decay_examples,
                                               learning_rate_decay,
                                               staircase=True)
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = optimizer_class(learning_rate)
    unused_video_id, model_input_raw, labels_batch, num_frames = (
        get_input_data_tensors(reader,
                               train_data_pattern,
                               batch_size=batch_size,
                               num_readers=num_readers,
                               num_epochs=num_epochs))
    tf.summary.histogram("model/input_raw", model_input_raw)

    feature_dim = len(model_input_raw.get_shape()) - 1

    model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)

    with tf.name_scope("model"):
        result = model.create_model(model_input,
                                    num_frames=num_frames,
                                    vocab_size=reader.num_classes,
                                    labels=labels_batch)

        for variable in slim.get_model_variables():
            tf.summary.histogram(variable.op.name, variable)

        predictions = result["predictions"]
        if "loss" in result.keys():
            label_loss = result["loss"]
        else:
            label_loss = label_loss_fn.calculate_loss(predictions,
                                                      labels_batch)
        tf.summary.scalar("label_loss", label_loss)

        if "regularization_loss" in result.keys():
            reg_loss = result["regularization_loss"]
        else:
            reg_loss = tf.constant(0.0)

        reg_losses = tf.losses.get_regularization_losses()
        if reg_losses:
            reg_loss += tf.add_n(reg_losses)

        if regularization_penalty != 0:
            tf.summary.scalar("reg_loss", reg_loss)

        # Adds update_ops (e.g., moving average updates in batch normalization) as
        # a dependency to the train_op.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if "update_ops" in result.keys():
            update_ops += result["update_ops"]
        if update_ops:
            with tf.control_dependencies(update_ops):
                barrier = tf.no_op(name="gradient_barrier")
                with tf.control_dependencies([barrier]):
                    label_loss = tf.identity(label_loss)

        # Incorporate the L2 weight penalties etc.
        final_loss = regularization_penalty * reg_loss + label_loss
        train_op = slim.learning.create_train_op(
            final_loss,
            optimizer,
            global_step=global_step,
            clip_gradient_norm=clip_gradient_norm)

        tf.add_to_collection("global_step", global_step)
        tf.add_to_collection("loss", label_loss)
        tf.add_to_collection("predictions", predictions)
        tf.add_to_collection("input_batch_raw", model_input_raw)
        tf.add_to_collection("input_batch", model_input)
        tf.add_to_collection("num_frames", num_frames)
        tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
        tf.add_to_collection("train_op", train_op)
Esempio n. 14
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     labels=None,
                     scope='default',
                     l2_penalty=1e-8,
                     is_training=True,
                     **unused_params):

        reshaped_input = tf.expand_dims(model_input, -1)
        reshaped_input = tf.expand_dims(reshaped_input, -1)

        out1 = tf.layers.conv2d(
            reshaped_input,
            128, (32, 1),
            activation=tf.nn.relu,
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            padding='same')
        out1_norm = tf.layers.batch_normalization(out1, training=is_training)
        out1_pool = tf.layers.max_pooling2d(out1_norm, (8, 1),
                                            2,
                                            padding='same')

        out2 = tf.layers.conv2d(
            out1_pool,
            256, (32, 1),
            activation=tf.nn.relu,
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            padding='same')
        out2_norm = tf.layers.batch_normalization(out2, training=is_training)
        out2_pool = tf.layers.max_pooling2d(out2_norm, (8, 1),
                                            2,
                                            padding='same')

        out3 = tf.layers.conv2d(
            out2_pool,
            256, (32, 1),
            activation=tf.nn.relu,
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            padding='same')
        out3_norm = tf.layers.batch_normalization(out3, training=is_training)
        out3_pool = tf.layers.max_pooling2d(out3_norm, (8, 1),
                                            2,
                                            padding='same')

        encoded = tf.reduce_max(out3_pool, axis=[2, 3])

        decode = tf.expand_dims(encoded, -1)
        decode = tf.expand_dims(decode, -1)
        decode1 = tf.layers.conv2d(
            decode,
            64, (4, 1),
            activation=tf.nn.relu,
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            padding='same')
        decode1_batch = tf.layers.batch_normalization(decode1,
                                                      training=is_training)
        decode1_upsample = tf.layers.conv2d_transpose(
            decode1_batch,
            256, (8, 1),
            strides=(2, 1),
            padding='same',
            activation=tf.nn.relu,
            kernel_initializer=tf.contrib.layers.xavier_initializer())

        decode2 = tf.layers.conv2d(
            decode1_upsample,
            64,
            1,
            activation=tf.nn.relu,
            kernel_initializer=tf.contrib.layers.xavier_initializer())
        decode2 = tf.layers.conv2d(
            decode2,
            64, (4, 1),
            activation=tf.nn.relu,
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            padding='same')
        decode2_batch = tf.layers.batch_normalization(decode2,
                                                      training=is_training)
        decode2_upsample = tf.layers.conv2d_transpose(
            decode2_batch,
            256, (8, 1),
            strides=(2, 1),
            padding='same',
            activation=tf.nn.relu,
            kernel_initializer=tf.contrib.layers.xavier_initializer())

        decode3 = tf.layers.conv2d(
            decode2_upsample,
            64,
            1,
            activation=tf.nn.relu,
            kernel_initializer=tf.contrib.layers.xavier_initializer())
        decode3 = tf.layers.conv2d(
            decode3,
            64, (4, 1),
            activation=tf.nn.relu,
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            padding='same')
        decode3_batch = tf.layers.batch_normalization(decode3,
                                                      training=is_training)
        decode3_upsample = tf.layers.conv2d_transpose(
            decode3_batch,
            256, (8, 1),
            strides=(2, 1),
            padding='same',
            activation=tf.nn.relu,
            kernel_initializer=tf.contrib.layers.xavier_initializer())

        decoded = tf.reduce_max(decode3_upsample, axis=[2, 3])

        results = {}
        results['loss'] = 500 * tf.losses.mean_squared_error(
            model_input, decoded)

        output = MoeModel().create_model(encoded, vocab_size)
        results['predictions'] = output['predictions']
        if labels is not None:
            results['loss'] += losses.CrossEntropyLoss().calculate_loss(
                results['predictions'], labels)

        return results
Esempio n. 15
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     labels,
                     scope='default',
                     l2_penalty=1e-8,
                     is_training=True,
                     **unused_params):

        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):

            out1 = slim.fully_connected(
                model_input,
                768,
                weights_regularizer=slim.l2_regularizer(l2_penalty))
            out1_drop = slim.dropout(out1, is_training=is_training)

            out2 = slim.fully_connected(
                out1_drop,
                512,
                weights_regularizer=slim.l2_regularizer(l2_penalty))
            out2_drop = slim.dropout(out2, is_training=is_training)

            encoded = slim.fully_connected(
                out2_drop,
                384,
                weights_regularizer=slim.l2_regularizer(l2_penalty))

            out4 = slim.fully_connected(
                encoded,
                640,
                weights_regularizer=slim.l2_regularizer(l2_penalty))
            out4_drop = slim.dropout(out4, is_training=is_training)

            out5 = slim.fully_connected(
                out4_drop,
                768,
                weights_regularizer=slim.l2_regularizer(l2_penalty))
            decoded = slim.fully_connected(
                out5,
                int(model_input.shape[1]),
                weights_regularizer=slim.l2_regularizer(l2_penalty))

            results = {}
            encoder_loss = 500 * tf.losses.mean_squared_error(
                model_input, decoded)
            tf.summary.scalar("encoder_loss", encoder_loss)
            results['loss'] = encoder_loss

            output = MoeModel().create_model(encoded,
                                             vocab_size,
                                             scope="final_layer")
            output1 = MoeModel().create_model(out4,
                                              vocab_size,
                                              scope="intermediate_layer")

            results['predictions'] = (output['predictions'] +
                                      output1['predictions']) / 2
            if labels is not None:
                prediction_loss = losses.CrossEntropyLoss().calculate_loss(
                    results['predictions'], labels)
                tf.summary.scalar("prediction_loss", prediction_loss)
                results['loss'] += prediction_loss

            return results
Esempio n. 16
0
def build_graph(reader,
                model,
                train_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                base_learning_rate=0.01,
                learning_rate_decay_examples=1000000,
                learning_rate_decay=0.95,
                optimizer_class=tf.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_readers=1,
                num_epochs=None):
    """Creates the Tensorflow graph.

  This will only be called once in the life of
  a training model, because after the graph is created the model will be
  restored from a meta graph file rather than being recreated.

  Args:
    reader: The data file reader. It should inherit from BaseReader.
    model: The core model (e.g. logistic or neural net). It should inherit
           from BaseModel.
    train_data_pattern: glob path to the training data files.
    label_loss_fn: What kind of loss to apply to the model. It should inherit
                from BaseLoss.
    batch_size: How many examples to process at a time.
    base_learning_rate: What learning rate to initialize the optimizer with.
    optimizer_class: Which optimization algorithm to use.
    clip_gradient_norm: Magnitude of the gradient to clip to.
    regularization_penalty: How much weight to give the regularization loss
                            compared to the label loss.
    num_readers: How many threads to use for I/O operations.
    num_epochs: How many passes to make over the data. 'None' means an
                unlimited number of passes.
  """

    global_step = tf.Variable(0, trainable=False, name="global_step")

    learning_rate = tf.train.exponential_decay(base_learning_rate,
                                               global_step * batch_size,
                                               learning_rate_decay_examples,
                                               learning_rate_decay,
                                               staircase=True)
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = optimizer_class(learning_rate)
    unused_video_id, model_input_raw, labels_batch, num_frames = (
        get_input_data_tensors(reader,
                               train_data_pattern,
                               batch_size=batch_size,
                               num_readers=num_readers,
                               num_epochs=num_epochs))
    tf.summary.histogram("model/input_raw", model_input_raw)

    feature_dim = len(model_input_raw.get_shape()) - 1

    model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)

    with tf.name_scope("model"):
        result = model.create_model(model_input,
                                    num_frames=num_frames,
                                    vocab_size=reader.num_classes,
                                    labels=labels_batch)

        for variable in slim.get_model_variables():
            tf.summary.histogram(variable.op.name, variable)

        predictions = result["predictions"]
        if "loss" in result.keys():
            label_loss = result["loss"]
        else:
            label_loss = label_loss_fn.calculate_loss(predictions,
                                                      labels_batch)
        tf.summary.scalar("label_loss", label_loss)

        if "regularization_loss" in result.keys():
            reg_loss = result["regularization_loss"]
        else:
            reg_loss = tf.constant(0.0)

        reg_losses = tf.losses.get_regularization_losses()
        if reg_losses:
            reg_loss += tf.add_n(reg_losses)

        if regularization_penalty != 0:
            tf.summary.scalar("reg_loss", reg_loss)

        # Adds update_ops (e.g., moving average updates in batch normalization) as
        # a dependency to the train_op.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if "update_ops" in result.keys():
            update_ops += result["update_ops"]
        if update_ops:
            with tf.control_dependencies(update_ops):
                barrier = tf.no_op(name="gradient_barrier")
                with tf.control_dependencies([barrier]):
                    label_loss = tf.identity(label_loss)

        # Incorporate the L2 weight penalties etc.
        final_loss = regularization_penalty * reg_loss + label_loss
        train_op = slim.learning.create_train_op(
            final_loss,
            optimizer,
            global_step=global_step,
            clip_gradient_norm=clip_gradient_norm)

        tf.add_to_collection("global_step", global_step)
        tf.add_to_collection("loss", label_loss)
        tf.add_to_collection("predictions", predictions)
        tf.add_to_collection("input_batch_raw", model_input_raw)
        tf.add_to_collection("input_batch", model_input)
        tf.add_to_collection("num_frames", num_frames)
        tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
        tf.add_to_collection("train_op", train_op)
Esempio n. 17
0
#data_pattern = ['/Users/super/yt8m_videofeature/train*.tfrecord']

data_pattern = ['I:\\yt8m_video\\train*.tfrecord']

num_epochs = 5

reader_batch_size = 1024

num_readers = 1

num_classes = 4716

mini_batch_size = 1024

label_loss_fn = losses.CrossEntropyLoss()

regularization_penalty = 1

base_learning_rate = 0.01

learning_rate_decay_examples = 4000000

learning_rate_decay = 0.95

optimizer_class = tf.train.AdamOptimizer

clip_gradient_norm = 1.0

regularization_penalty = 1
Esempio n. 18
0
def build_graph(reader,
                model,
                train_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                base_learning_rate=0.01,
                learning_rate_decay_examples=1000000,
                learning_rate_decay=0.95,
                optimizer_class=tf.train.AdamOptimizer,
                transformer_class=feature_transform.DefaultTransformer,
                augmenter_class=data_augmentation.DefaultAugmenter,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_readers=1,
                num_epochs=None):
    """Creates the Tensorflow graph.

  This will only be called once in the life of
  a training model, because after the graph is created the model will be
  restored from a meta graph file rather than being recreated.

  Args:
    reader: The data file reader. It should inherit from BaseReader.
    model: The core model (e.g. logistic or neural net). It should inherit
           from BaseModel.
    train_data_pattern: glob path to the training data files.
    label_loss_fn: What kind of loss to apply to the model. It should inherit
                from BaseLoss.
    batch_size: How many examples to process at a time.
    base_learning_rate: What learning rate to initialize the optimizer with.
    optimizer_class: Which optimization algorithm to use.
    clip_gradient_norm: Magnitude of the gradient to clip to.
    regularization_penalty: How much weight to give the regularization loss
                            compared to the label loss.
    num_readers: How many threads to use for I/O operations.
    num_epochs: How many passes to make over the data. 'None' means an
                unlimited number of passes.
  """

    global_step = tf.Variable(0, trainable=False, name="global_step")

    learning_rate = tf.train.exponential_decay(base_learning_rate,
                                               global_step * batch_size,
                                               learning_rate_decay_examples,
                                               learning_rate_decay,
                                               staircase=True)
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = optimizer_class(learning_rate)
    if FLAGS.distillation_features:
        video_id, model_input_raw, labels_batch, num_frames, distill_labels_batch = (
            get_input_data_tensors(reader,
                                   train_data_pattern,
                                   batch_size=batch_size,
                                   num_readers=num_readers,
                                   num_epochs=num_epochs))
        if FLAGS.distillation_features and FLAGS.distillation_type == 2:
            p = FLAGS.distillation_percent
            print "distillation_percent =", p, "reforming labels"
            float_labels = tf.cast(labels_batch, dtype=tf.float32)
            sum_float_labels = tf.reduce_sum(float_labels,
                                             axis=1,
                                             keep_dims=True)
            sum_distill_labels = tf.reduce_sum(
                distill_labels_batch, axis=1, keep_dims=True) + 1e-6
            distill_labels_batch = float_labels + distill_labels_batch * (
                sum_float_labels / sum_distill_labels * p)
            distill_labels_batch = tf.clip_by_value(distill_labels_batch,
                                                    clip_value_min=0.0,
                                                    clip_value_max=1.0)
    else:
        video_id, model_input_raw, labels_batch, num_frames = (
            get_input_data_tensors(reader,
                                   train_data_pattern,
                                   batch_size=batch_size,
                                   num_readers=num_readers,
                                   num_epochs=num_epochs))

    # data augmentation, will not persist in inference
    data_augmenter = augmenter_class()
    model_input_raw, labels_batch, num_frames = data_augmenter.augment(
        model_input_raw, num_frames=num_frames, labels_batch=labels_batch)

    tf.summary.histogram("model/input_raw", model_input_raw)

    feature_transformer = transformer_class()
    model_input, num_frames = feature_transformer.transform(
        model_input_raw, num_frames=num_frames)

    tf.summary.histogram("model/input", model_input)

    with tf.name_scope("model"):
        if FLAGS.noise_level > 0:
            noise_level_tensor = tf.placeholder_with_default(
                0.0, shape=[], name="noise_level")
        else:
            noise_level_tensor = None

        if FLAGS.distillation_as_input:
            distillation_predictions = distill_labels_batch
        else:
            distillation_predictions = None

        if FLAGS.dropout:
            keep_prob_tensor = tf.placeholder_with_default(1.0,
                                                           shape=[],
                                                           name="keep_prob")
            result = model.create_model(
                model_input,
                num_frames=num_frames,
                vocab_size=reader.num_classes,
                labels=labels_batch,
                dropout=FLAGS.dropout,
                keep_prob=keep_prob_tensor,
                distillation_predictions=distillation_predictions,
                noise_level=noise_level_tensor)
        else:
            result = model.create_model(
                model_input,
                num_frames=num_frames,
                vocab_size=reader.num_classes,
                labels=labels_batch,
                distillation_predictions=distillation_predictions,
                noise_level=noise_level_tensor)

        for variable in slim.get_model_variables():
            tf.summary.histogram(variable.op.name, variable)

        print "result", result
        predictions = result["predictions"]
        if "loss" in result.keys():
            label_loss = result["loss"]
        else:
            video_weights_batch = None
            if FLAGS.reweight:
                video_weights_batch = get_video_weights(video_id)

            if FLAGS.distillation_as_boosting:
                video_weights_batch = get_weights_by_predictions(
                    labels_batch, distillation_predictions)

            if FLAGS.multitask:
                support_predictions = result["support_predictions"]
                tf.summary.histogram("model/support_predictions",
                                     support_predictions)
                print "support_predictions", support_predictions
                if FLAGS.distillation_features and FLAGS.distillation_type == 1:
                    p = FLAGS.distillation_percent
                    print "distillation_percent =", p
                    if p <= 0:
                        label_loss = label_loss_fn.calculate_loss(
                            predictions,
                            support_predictions,
                            labels_batch,
                            weights=video_weights_batch)
                    elif p >= 1:
                        label_loss = label_loss_fn.calculate_loss(
                            predictions,
                            support_predictions,
                            distill_labels_batch,
                            weights=video_weights_batch)
                    else:
                        label_loss = label_loss_fn.calculate_loss(predictions, support_predictions, labels_batch, weights=video_weights_batch) * (1.0 - p) \
                                    + label_loss_fn.calculate_loss(predictions, support_predictions, distill_labels_batch, weights=video_weights_batch) * p
                elif FLAGS.distillation_features and FLAGS.distillation_type == 2:
                    print "using pure distillation loss"
                    label_loss = label_loss_fn.calculate_loss(
                        predictions,
                        support_predictions,
                        distill_labels_batch,
                        weights=video_weights_batch)
                else:
                    print "using original loss"
                    label_loss = label_loss_fn.calculate_loss(
                        predictions,
                        support_predictions,
                        labels_batch,
                        weights=video_weights_batch)
            else:
                if FLAGS.distillation_features and FLAGS.distillation_type == 1:
                    p = FLAGS.distillation_percent
                    print "distillation_percent =", p
                    if p <= 0:
                        label_loss = label_loss_fn.calculate_loss(
                            predictions,
                            labels_batch,
                            weights=video_weights_batch)
                    elif p >= 1:
                        label_loss = label_loss_fn.calculate_loss(
                            predictions,
                            distill_labels_batch,
                            weights=video_weights_batch)
                    else:
                        label_loss = label_loss_fn.calculate_loss(predictions, labels_batch, weights=video_weights_batch) * (1.0 - p) \
                                     + label_loss_fn.calculate_loss(predictions, distill_labels_batch, weights=video_weights_batch) * p
                elif FLAGS.distillation_features and FLAGS.distillation_type == 2:
                    print "using pure distillation loss"
                    label_loss = label_loss_fn.calculate_loss(
                        predictions,
                        distill_labels_batch,
                        weights=video_weights_batch)
                else:
                    print "using original loss"
                    label_loss = label_loss_fn.calculate_loss(
                        predictions, labels_batch, weights=video_weights_batch)

        tf.summary.histogram("model/predictions", predictions)
        tf.summary.scalar("label_loss", label_loss)

        if "regularization_loss" in result.keys():
            reg_loss = result["regularization_loss"]
        else:
            reg_loss = tf.constant(0.0)

        reg_losses = tf.losses.get_regularization_losses()
        if reg_losses:
            reg_loss += tf.add_n(reg_losses)

        if regularization_penalty != 0:
            tf.summary.scalar("reg_loss", reg_loss)

        # Adds update_ops (e.g., moving average updates in batch normalization) as
        # a dependency to the train_op.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if "update_ops" in result.keys():
            update_ops += result["update_ops"]
        if update_ops:
            with tf.control_dependencies(update_ops):
                barrier = tf.no_op(name="gradient_barrier")
                with tf.control_dependencies([barrier]):
                    label_loss = tf.identity(label_loss)

        # Incorporate the L2 weight penalties etc.
        final_loss = regularization_penalty * reg_loss + label_loss

        gradients = optimizer.compute_gradients(
            final_loss, colocate_gradients_with_ops=False)
        if clip_gradient_norm > 0:
            with tf.name_scope('clip_grads'):
                gradients = utils.clip_gradient_norms(gradients,
                                                      clip_gradient_norm)
        train_op = optimizer.apply_gradients(gradients,
                                             global_step=global_step)

        tf.add_to_collection("global_step", global_step)
        tf.add_to_collection("loss", label_loss)
        tf.add_to_collection("predictions", predictions)
        tf.add_to_collection("input_batch_raw", model_input_raw)
        tf.add_to_collection("input_batch", model_input)
        tf.add_to_collection("num_frames", num_frames)
        tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
        tf.add_to_collection("train_op", train_op)
        if FLAGS.dropout:
            tf.add_to_collection("keep_prob", keep_prob_tensor)
        if FLAGS.noise_level > 0:
            tf.add_to_collection("noise_level", noise_level_tensor)
Esempio n. 19
0
def build_graph(reader,
                model,
                train_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                base_learning_rate=0.01,
                learning_rate_decay_examples=1000000,
                learning_rate_decay=0.95,
                optimizer_class=tf.compat.v1.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_readers=1,
                num_epochs=None):
    """Creates the Tensorflow graph.

  This will only be called once in the life of
  a training model, because after the graph is created the model will be
  restored from a meta graph file rather than being recreated.

  Args:
    reader: The data file reader. It should inherit from BaseReader.
    model: The core model (e.g. logistic or neural net). It should inherit from
      BaseModel.
    train_data_pattern: glob path to the training data files.
    label_loss_fn: What kind of loss to apply to the model. It should inherit
      from BaseLoss.
    batch_size: How many examples to process at a time.
    base_learning_rate: What learning rate to initialize the optimizer with.
    optimizer_class: Which optimization algorithm to use.
    clip_gradient_norm: Magnitude of the gradient to clip to.
    regularization_penalty: How much weight to give the regularization loss
      compared to the label loss.
    num_readers: How many threads to use for I/O operations.
    num_epochs: How many passes to make over the data. 'None' means an unlimited
      number of passes.
  """

    global_step = tf.Variable(0, trainable=False, name="global_step")

    local_device_protos = device_lib.list_local_devices()
    gpus = [x.name for x in local_device_protos if x.device_type == "GPU"]
    gpus = gpus[:FLAGS.num_gpu]
    num_gpus = len(gpus)

    if num_gpus > 0:
        logging.info("Using the following GPUs to train: " + str(gpus))
        num_towers = num_gpus
        device_string = "/gpu:%d"
    else:
        logging.info("No GPUs found. Training on CPU.")
        num_towers = 1
        device_string = "/cpu:%d"

    learning_rate = tf.train.exponential_decay(base_learning_rate,
                                               global_step * batch_size *
                                               num_towers,
                                               learning_rate_decay_examples,
                                               learning_rate_decay,
                                               staircase=True)
    tf.summary.scalar("learning_rate", learning_rate)

    optimizer = optimizer_class(learning_rate)
    input_data_dict = (get_input_data_tensors(reader,
                                              train_data_pattern,
                                              batch_size=batch_size *
                                              num_towers,
                                              num_readers=num_readers,
                                              num_epochs=num_epochs))
    print('input_data_dict', input_data_dict)
    model_input_raw = input_data_dict["video_matrix"]
    labels_batch = input_data_dict["labels"]
    num_frames = input_data_dict["num_frames"]
    print("model_input_shape, ", model_input_raw.shape)
    print("labels_batch, ", labels_batch)

    import csv
    import urllib3
    import numpy as np
    import pandas as pd
    whitelisted_cls_mask = np.zeros((3862, ), dtype=np.float32)
    url = pd.read_csv('segment_label_ids.csv')
    # response = urllib2.urlopen(url)
    for line in url:
        try:
            cls_id = int(line[0])
            whitelisted_cls_mask[cls_id] = 1.
        except ValueError:
            # Simply skip the non-integer line.
            continue
    #response.close()

    # url2 = 'http://storage.googleapis.com/youtube8m-lijun-mlengine/classCount.csv'
    # response2 = urllib2.urlopen(url2)
    # fobj2 = csv.reader(response2)
    # for line in fobj2:
    #   try:
    #     cls_id = int(line[0])
    #     whitelisted_cls_mask[cls_id] = (15-np.log(int(line[1])))**2
    #   except ValueError:
    #       # Simply skip the non-integer line.
    #     continue
    # response2.close()
    # select=tf.matmul(tf.cast(labels_batch, tf.float32),tf.reshape(whitelisted_cls_mask,[3862,1]))>0
    # select=tf.squeeze(select)
    # model_input_raw = model_input_raw[select,:,:]
    # labels_batch = labels_batch[select,:]
    # num_frames = num_frames[select]

    tf.summary.histogram("model/input_raw", model_input_raw)
    feature_dim = len(model_input_raw.get_shape()) - 1
    model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)

    tower_inputs = tf.split(model_input, num_towers)
    tower_labels = tf.split(labels_batch, num_towers)
    tower_num_frames = tf.split(num_frames, num_towers)
    tower_gradients = []
    tower_predictions = []
    tower_label_losses = []
    tower_reg_losses = []

    # import csv
    # import urllib2
    # import numpy as np
    # whitelisted_cls_mask = np.zeros((3862,),
    #                               dtype=np.float32)
    # url = 'http://storage.googleapis.com/youtube8m-lijun-mlengine/segment_label_ids.csv'
    # response = urllib2.urlopen(url)
    # fobj = csv.reader(response)
    # for line in fobj:
    #   try:
    #     cls_id = int(line[0])
    #     whitelisted_cls_mask[cls_id] = 1.
    #   except ValueError:
    #       # Simply skip the non-integer line.
    #     continue
    # response.close()
    # whitelisted_cls_mask=whitelisted_cls_mask+np.ones((3862,),dtype=np.float32)
    whitelisted_cls_mask = whitelisted_cls_mask * 4 + np.ones(
        (3862, ), dtype=np.float32)
    # whitelisted_cls_mask=0.05*(whitelisted_cls_mask*99+np.ones((3862,),dtype=np.float32))
    # print('whitelisted_cls_mask',np.amin(whitelisted_cls_mask))
    for i in range(num_towers):
        # For some reason these 'with' statements can't be combined onto the same
        # line. They have to be nested.f
        with tf.device(device_string % i):
            with (tf.variable_scope(("tower"), reuse=True if i > 0 else
                                    None)):  # reuse=True if i > 0 else None
                with (slim.arg_scope(
                    [slim.model_variable, slim.variable],
                        device="/cpu:0" if num_gpus != 1 else "/gpu:0")):
                    result = model.create_model(tower_inputs[i],
                                                num_frames=tower_num_frames[i],
                                                vocab_size=reader.num_classes,
                                                labels=tower_labels[i])
                    for variable in slim.get_model_variables():
                        tf.summary.histogram(variable.op.name, variable)
                    # print('result predictions',result["predictions"])
                    predictions = result["predictions"]
                    tower_predictions.append(predictions)

                    if "loss" in result.keys():
                        label_loss = result["loss"]
                    else:
                        label_loss = label_loss_fn.calculate_loss(
                            predictions,
                            tower_labels[i],
                            label_weights=whitelisted_cls_mask)
                        if "aux_predictions" in result.keys():
                            for pred in result["aux_predictions"]:
                                label_loss += label_loss_fn.calculate_loss(
                                    pred,
                                    tower_labels[i],
                                    label_weights=whitelisted_cls_mask)
                    # print('label_loss',label_loss)
                    if "regularization_loss" in result.keys():
                        reg_loss = result["regularization_loss"]
                    else:
                        reg_loss = tf.constant(0.0)

                    reg_losses = tf.losses.get_regularization_losses()
                    if reg_losses:
                        reg_loss += tf.add_n(reg_losses)

                    tower_reg_losses.append(reg_loss)

                    # Adds update_ops (e.g., moving average updates in batch normalization) as
                    # a dependency to the train_op.
                    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                    if "update_ops" in result.keys():
                        update_ops += result["update_ops"]
                    if update_ops:
                        with tf.control_dependencies(update_ops):
                            barrier = tf.no_op(name="gradient_barrier")
                            with tf.control_dependencies([barrier]):
                                label_loss = tf.identity(label_loss)

                    tower_label_losses.append(label_loss)

                    # Incorporate the L2 weight penalties etc.
                    final_loss = regularization_penalty * reg_loss + label_loss
                    gradients = optimizer.compute_gradients(
                        final_loss, colocate_gradients_with_ops=False)
                    tower_gradients.append(gradients)
    label_loss = tf.reduce_mean(tf.stack(tower_label_losses))
    tf.summary.scalar("label_loss", label_loss)
    if regularization_penalty != 0:
        reg_loss = tf.reduce_mean(tf.stack(tower_reg_losses))
        tf.summary.scalar("reg_loss", reg_loss)
    merged_gradients = utils.combine_gradients(tower_gradients)

    if clip_gradient_norm > 0:
        with tf.name_scope("clip_grads"):
            merged_gradients = utils.clip_gradient_norms(
                merged_gradients, clip_gradient_norm)

    train_op = optimizer.apply_gradients(merged_gradients,
                                         global_step=global_step)

    tf.add_to_collection("global_step", global_step)
    tf.add_to_collection("loss", label_loss)
    tf.add_to_collection("predictions", tf.concat(tower_predictions, 0))
    tf.add_to_collection("input_batch_raw", model_input_raw)
    tf.add_to_collection("input_batch", model_input)
    tf.add_to_collection("num_frames", num_frames)
    tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
    tf.add_to_collection("train_op", train_op)
Esempio n. 20
0
def build_graph(reader,
                model,
                train_data_pattern,
                train_data_pattern2,
                train_data_pattern3,
                eval_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                base_learning_rate=0.01,
                learning_rate_decay_examples=1000000,
                learning_rate_decay=0.95,
                optimizer_class=tf.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_readers=1,
                num_epochs=None,
                l2_penalty=1e-8,
                gpu_only=1):
    """Creates the Tensorflow graph.

  This will only be called once in the life of
  a training model, because after the graph is created the model will be
  restored from a meta graph file rather than being recreated.

  Args:
    reader: The data file reader. It should inherit from BaseReader.
    model: The core model (e.g. logistic or neural net). It should inherit
           from BaseModel.
    train_data_pattern: glob path to the training data files.
    label_loss_fn: What kind of loss to apply to the model. It should inherit
                from BaseLoss.
    batch_size: How many examples to process at a time.
    base_learning_rate: What learning rate to initialize the optimizer with.
    optimizer_class: Which optimization algorithm to use.
    clip_gradient_norm: Magnitude of the gradient to clip to.
    regularization_penalty: How much weight to give the regularization loss
                            compared to the label loss.
    num_readers: How many threads to use for I/O operations.
    num_epochs: How many passes to make over the data. 'None' means an
                unlimited number of passes.
  """
    # data files
    files1 = gfile.Glob(train_data_pattern)
    files2 = gfile.Glob(train_data_pattern2)
    files3 = gfile.Glob(train_data_pattern3)
    files = files1 + files2 + files3
    if not files:
        raise IOError("Unable to find training files. data_pattern='" +
                      data_pattern + "'.")
    logging.info("Total number of training files: %s + %s + %s =  %s.",
                 str(len(files1)), str(len(files2)), str(len(files3)),
                 str(len(files)))

    files4 = gfile.Glob(eval_data_pattern)
    logging.info("Total number of eval files: %s.", str(len(files4)))

    if FLAGS.fold == -1:
        validate_files = files4
        train_files = files
    else:
        validate_files = files[FLAGS.fold::5]
        train_files = [x for x in files if x not in validate_files]

    logging.info("train files: {}, first is: {}.".format(
        len(train_files), train_files[0].split('/')[-1]))
    logging.info("eval files: {}, first is: {}.".format(
        len(validate_files), validate_files[0].split('/')[-1]))

    # label weights for loss function. ugly hard coded for now.
    wgts_np = np.ones(FLAGS.truncated_num_classes)
    over_weight_labels = False
    if over_weight_labels:
        labels_to_overwgt = [
            38, 47, 49, 55, 72, 76, 86, 89, 93, 94, 95, 98, 99, 101, 102, 110,
            111, 113, 114, 115, 120, 121
        ]
        wgts_np[labels_to_overwgt] = 2.0
    wgts_4_lossfn = tf.constant(wgts_np, dtype=tf.float32)

    global_step = tf.Variable(0, trainable=False, name="global_step")
    restart_learning_rate = tf.Variable(base_learning_rate,
                                        trainable=False,
                                        name="restart_learning_rate")

    local_device_protos = device_lib.list_local_devices()
    gpus = [x.name for x in local_device_protos if x.device_type == 'GPU']
    num_gpus = len(gpus)

    if num_gpus > 0:
        logging.info("Using the following GPUs to train: " + str(gpus))
        num_towers = num_gpus
        device_string = '/gpu:%d'
    else:
        logging.info("No GPUs found. Training on CPU.")
        num_towers = 1
        device_string = '/cpu:%d'

    learning_rate = tf.train.exponential_decay(restart_learning_rate,
                                               global_step * batch_size *
                                               num_towers,
                                               learning_rate_decay_examples,
                                               learning_rate_decay,
                                               staircase=True)
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = optimizer_class(learning_rate)
    unused_video_id, model_input_raw, labels_batch, num_frames = (
        get_input_data_tensors(reader,
                               train_files,
                               batch_size=batch_size * num_towers,
                               num_readers=num_readers,
                               num_epochs=num_epochs))
    tf.summary.histogram("model/input_raw", model_input_raw)

    # model params
    # probabilities for keeping a neuron in a layer, assuming max 10 layers, below default value
    with tf.variable_scope("tower", reuse=True) as scope:
        layers_keep_probs = tf.Variable(
            [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            trainable=False,
            name="layers_keep_probs")
    model_input = model_input_raw
    if FLAGS.apply_global_normalization:
        g_mean, g_std = model_utils.load_global_moments()
        g_inv_std = 1.0 / g_std
        global_mean = tf.constant(g_mean, dtype=tf.float32)
        # expand global mean to match new dimension and fill rest with zeros
        new_dim = tf.cast(model_input_raw.shape[1], tf.int32)
        zero_padding = tf.zeros(new_dim - tf.shape(global_mean), tf.float32)
        global_mean_padded = tf.concat([global_mean, zero_padding], 0)
        # expand global inv std to match new dimension and fill rest with ones
        global_inv_std = tf.constant(g_inv_std, dtype=tf.float32)
        one_padding = tf.ones(new_dim - tf.shape(global_inv_std), tf.float32)
        global_inv_std_padded = tf.concat([global_inv_std, one_padding], 0)
        # apply normalizations (can do both) if requested
        # global L2 normalization
        model_input = tf.multiply(tf.subtract(model_input, global_mean_padded),
                                  global_inv_std_padded)
    # regular L2 normalization
    if FLAGS.apply_batch_l2_normalization:
        feature_dim = len(model_input.get_shape()) - 1
        model_input = tf.nn.l2_normalize(model_input, feature_dim)

    tower_inputs = tf.split(model_input, num_towers)
    tower_labels = tf.split(labels_batch, num_towers)
    tower_num_frames = tf.split(num_frames, num_towers)
    tower_gradients = []
    tower_predictions = []
    tower_label_losses = []
    tower_reg_losses = []

    # eval graph - to monitor performance out of sample during training
    e_video_id, e_input_raw, e_labels_batch, e_num_frames = (
        get_input_data_tensors(reader,
                               validate_files,
                               batch_size=batch_size * num_towers,
                               num_readers=num_readers,
                               num_epochs=2 * num_epochs))
    e_input = e_input_raw
    if FLAGS.apply_global_normalization:
        e_input = tf.multiply(tf.subtract(e_input, global_mean_padded),
                              global_inv_std_padded)
    if FLAGS.apply_batch_l2_normalization:
        feature_dim = len(model_input.get_shape()) - 1
        e_input = tf.nn.l2_normalize(e_input, feature_dim)

    e_tower_inputs = tf.split(e_input, num_towers)
    e_tower_labels = tf.split(e_labels_batch, num_towers)
    e_tower_num_frames = tf.split(e_num_frames, num_towers)
    e_tower_predictions = []
    e_tower_layers_keep_probs = tf.Variable(
        [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
        trainable=False,
        name="layers_keep_probs")
    logging.info(e_tower_inputs)
    # end eval
    for i in range(num_towers):
        # For some reason these 'with' statements can't be combined onto the same
        # line. They have to be nested.
        logging.info('For tower: ' + str(i))
        with tf.device(device_string % i):
            with (tf.variable_scope(("tower"), reuse=True if i > 0 else None)):
                with (slim.arg_scope(
                    [slim.model_variable, slim.variable],
                        device="/cpu:0" if num_gpus != 1 else "/gpu:0")):
                    logging.info(layers_keep_probs)
                    result = model.create_model(
                        tower_inputs[i],
                        num_frames=tower_num_frames[i],
                        vocab_size=reader.num_classes,
                        labels=tower_labels[i],
                        layers_keep_probs=layers_keep_probs,
                        l2_penalty=l2_penalty,
                        is_training=True)
                    for variable in slim.get_model_variables():
                        logging.info(variable)
                        tf.summary.histogram(variable.op.name, variable)

                    # create shadow moving average model variables
                    if FLAGS.use_ema == True:
                        model_vars = [x for x in slim.get_model_variables()]
                        ema = tf.train.ExponentialMovingAverage(
                            decay=1.0 - 1.0 / FLAGS.ema_halflife)
                        ema_op = ema.apply(model_vars)
                        logging.info("model_vars:")
                        logging.info(" || ".join([str(x) for x in model_vars]))
                        ema_vars = [ema.average(x) for x in model_vars]
                        ema_vars_pair_dict = {
                            ema.average_name(x): x.op.name
                            for x in model_vars
                        }
                        logging.info("ema_vars_pair_dict:")
                        for x, y in ema_vars_pair_dict.items():
                            logging.info(x + ': ' + y)
                        for v in ema_vars:
                            tf.summary.histogram(v.op.name, v)
                        tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, ema_op)
                        tf.add_to_collection("ema_op", ema_op)

                    predictions = result["predictions"]
                    tower_predictions.append(predictions)

                    if "loss" in result.keys():
                        label_loss = result["loss"]
                    else:
                        label_loss = label_loss_fn.calculate_loss(
                            predictions, tower_labels[i], FLAGS.loss_epsilon)

                    if "regularization_loss" in result.keys():
                        reg_loss = result["regularization_loss"]
                    else:
                        reg_loss = tf.constant(0.0)

                    reg_losses = tf.losses.get_regularization_losses()
                    if reg_losses:
                        reg_loss += tf.add_n(reg_losses)

                    tower_reg_losses.append(reg_loss)

                    # Adds update_ops (e.g., moving average updates in batch normalization) as
                    # a dependency to the train_op.
                    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                    if "update_ops" in result.keys():
                        update_ops += result["update_ops"]
                    if update_ops:
                        with tf.control_dependencies(update_ops):
                            barrier = tf.no_op(name="gradient_barrier")
                            with tf.control_dependencies([barrier]):
                                label_loss = tf.identity(label_loss)

                    tower_label_losses.append(label_loss)

                    # Incorporate the L2 weight penalties etc.
                    final_loss = regularization_penalty * reg_loss + label_loss
                    gradients = optimizer.compute_gradients(
                        final_loss, colocate_gradients_with_ops=False)
                    tower_gradients.append(gradients)

                    # eval ops
                    logging.info("eval ops")
                    e_result = model.create_model(
                        e_tower_inputs[i],
                        num_frames=e_tower_num_frames[i],
                        vocab_size=reader.num_classes,
                        labels=e_tower_labels[i],
                        layers_keep_probs=
                        e_tower_layers_keep_probs,  #tf.Variable([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], tf.float32, name="layers_keep_probs")
                        l2_penalty=l2_penalty,
                        is_training=False)

                    e_predictions = e_result["predictions"]
                    e_tower_predictions.append(e_predictions)
                    # end eval ops

    label_loss = tf.reduce_mean(tf.stack(tower_label_losses))
    tf.summary.scalar("label_loss", label_loss)
    if regularization_penalty != 0:
        reg_loss = tf.reduce_mean(tf.stack(tower_reg_losses))
        tf.summary.scalar("reg_loss", reg_loss)
    merged_gradients = utils.combine_gradients(tower_gradients)

    if clip_gradient_norm > 0:
        with tf.name_scope('clip_grads'):
            merged_gradients = utils.clip_gradient_norms(
                merged_gradients, clip_gradient_norm)

    train_op = optimizer.apply_gradients(merged_gradients,
                                         global_step=global_step)

    tf.add_to_collection("global_step", global_step)
    tf.add_to_collection("restart_learning_rate", restart_learning_rate)
    tf.add_to_collection("layers_keep_probs", layers_keep_probs)
    tf.add_to_collection("loss", label_loss)
    tf.add_to_collection("predictions", tf.concat(tower_predictions, 0))
    tf.add_to_collection("input_batch_raw", model_input_raw)
    tf.add_to_collection("input_batch", model_input)
    tf.add_to_collection("num_frames", num_frames)
    tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
    tf.add_to_collection("train_op", train_op)
    #tf.add_to_collection("ema_op", ema_op)

    # add eval graph
    e_label_loss = label_loss_fn.calculate_loss(
        tf.concat(e_tower_predictions, 0), e_labels_batch, FLAGS.loss_epsilon)
    tf.summary.scalar("e_label_loss", e_label_loss)

    tf.add_to_collection("e_predictions", tf.concat(e_tower_predictions, 0))
    tf.add_to_collection("e_labels", tf.cast(e_labels_batch, tf.float32))
    tf.add_to_collection("e_loss", e_label_loss)
def build_graph(reader,
                model,
                train_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                base_learning_rate=0.01,
                learning_rate_decay_examples=1000000,
                learning_rate_decay=0.95,
                optimizer_class=tf.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_readers=1,
                num_epochs=None):
    """Creates the Tensorflow graph.

    This will only be called once in the life of
    a training model, because after the graph is created the model will be
    restored from a meta graph file rather than being recreated.

    Args:
      reader: The data file reader. It should inherit from BaseReader.
      model: The core model (e.g. logistic or neural net). It should inherit from
        BaseModel.
      train_data_pattern: glob path to the training data files.
      label_loss_fn: What kind of loss to apply to the model. It should inherit
        from BaseLoss.
      batch_size: How many examples to process at a time.
      base_learning_rate: What learning rate to initialize the optimizer with.
      optimizer_class: Which optimization algorithm to use.
      clip_gradient_norm: Magnitude of the gradient to clip to.
      regularization_penalty: How much weight to give the regularization loss
        compared to the label loss.
      num_readers: How many threads to use for I/O operations.
      num_epochs: How many passes to make over the data. 'None' means an unlimited
        number of passes.
    """

    global_step = tf.Variable(0, trainable=False, name="global_step")

    local_device_protos = device_lib.list_local_devices()
    gpus = [x.name for x in local_device_protos if x.device_type == "GPU"]
    print(gpus)
    gpus = gpus[:FLAGS.num_gpu]
    num_gpus = len(gpus)

    if num_gpus > 0:
        logging.info("Using the following GPUs to train: " + str(gpus))
        num_towers = num_gpus
        device_string = "/gpu:%d"
    else:
        logging.info("No GPUs found. Training on CPU.")
        num_towers = 1
        device_string = "/cpu:%d"

    learning_rate = tf.train.exponential_decay(base_learning_rate,
                                               global_step * batch_size,
                                               learning_rate_decay_examples,
                                               learning_rate_decay,
                                               staircase=True)
    tf.summary.scalar("learning_rate", learning_rate)

    if clip_gradient_norm > 0.:
        optimizer = optimizer_class(learning_rate)
    else:
        optimizer = optimizer_class(learning_rate)
    input_data_dict = (get_input_data_tensors(reader,
                                              train_data_pattern,
                                              batch_size=batch_size,
                                              num_readers=num_readers,
                                              num_epochs=num_epochs))
    model_input_raw = input_data_dict["video_matrix"]
    labels_batch = input_data_dict["labels"]
    num_frames = input_data_dict["num_frames"]

    print("model_input_shape, ", model_input_raw.shape)
    tf.summary.histogram("model/input_raw", model_input_raw)

    feature_dim = len(model_input_raw.get_shape()) - 1

    offset = np.array([4. / 512] * 1024 + [0] * 128)
    offset = tf.constant(offset, dtype=tf.float32)

    eigen_val = tf.constant(np.sqrt(
        np.load("yt8m_pca/eigenvals.npy")[:1024, 0]),
                            dtype=tf.float32)

    model_input = tf.multiply(
        model_input_raw - offset,
        tf.pad(eigen_val + 1e-4, [[0, 128]], constant_values=1.))

    # model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)

    if FLAGS.segment_labels:
        label_weights = input_data_dict["label_weights"]
    else:
        label_weights = None
    tower_logits = []
    tower_predictions = []
    tower_label_losses = []
    tower_reg_losses = []
    print("flag1!!!!", device_string)
    for i in range(num_towers):
        # For some reason these 'with' statements can't be combined onto the same
        # line. They have to be nested.
        with tf.device(device_string % i):
            with tf.variable_scope("tower_%d" % i, reuse=False):
                result = model.create_model(model_input,
                                            num_frames=num_frames,
                                            vocab_size=reader.num_classes,
                                            labels=labels_batch,
                                            is_training=True)
                for variable in slim.get_model_variables():
                    tf.summary.histogram(variable.op.name, variable)

                predictions = result["predictions"]
                tower_predictions.append(predictions)
                logits = result["logits"]
                tower_logits.append(logits)

                if "loss" in result.keys():
                    label_loss = result["loss"]
                else:
                    label_loss = label_loss_fn.calculate_loss(
                        predictions, labels_batch, label_weights=label_weights)
                    if "aux_predictions" in result.keys():
                        for pred in result["aux_predictions"]:
                            label_loss += label_loss_fn.calculate_loss(
                                pred,
                                labels_batch,
                                label_weights=label_weights)

                if "regularization_loss" in result.keys():
                    reg_loss = result["regularization_loss"]
                else:
                    reg_loss = tf.constant(0.0)

                reg_losses = tf.losses.get_regularization_losses()
                if reg_losses:
                    reg_loss += tf.add_n(reg_losses)

                tower_reg_losses.append(reg_loss)

                # Adds update_ops (e.g., moving average updates in batch normalization) as
                # a dependency to the train_op.
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                if "update_ops" in result.keys():
                    update_ops += result["update_ops"]
                if update_ops:
                    with tf.control_dependencies(update_ops):
                        barrier = tf.no_op(name="gradient_barrier")
                        with tf.control_dependencies([barrier]):
                            label_loss = tf.identity(label_loss)
                tower_label_losses.append(label_loss)

    with tf.device("/gpu:%d" % 0):
        with tf.variable_scope("ensemble"):
            ftr_mean = tf.reduce_mean(model_input, axis=1)
            print("ftr mean shape: ", ftr_mean.get_shape().as_list())
            ftr_mean = slim.batch_norm(ftr_mean,
                                       center=True,
                                       scale=True,
                                       fused=False,
                                       is_training=True,
                                       scope="mix_weights_bn")
            mix_weights = slim.fully_connected(
                ftr_mean,
                num_towers,
                activation_fn=None,
                weights_initializer=slim.variance_scaling_initializer(),
                scope="mix_weights")
            mix_weights = tf.nn.softmax(mix_weights, axis=-1)
            tf.summary.histogram("mix_weights", mix_weights)
            logits = tf.stack(tower_logits, axis=1)
            final_logit = tf.reduce_sum(tf.multiply(
                logits, tf.expand_dims(mix_weights, axis=-1)),
                                        axis=1,
                                        keepdims=False)
            final_predictions = tf.nn.sigmoid(final_logit)
        print("flag2!!!", FLAGS.final_temperature, FLAGS.final_lambda)
        rank_pred = tf.expand_dims(tf.nn.softmax(tf.div(
            final_logit, FLAGS.final_temperature),
                                                 axis=-1),
                                   axis=1)
        aux_rank_preds = tf.nn.softmax(tf.div(logits, FLAGS.final_temperature),
                                       axis=-1)
        epsilon = 1e-8
        kl_loss = tf.reduce_sum(
            rank_pred *
            (tf.log(rank_pred + epsilon) - tf.log(aux_rank_preds + epsilon)),
            axis=-1)
        regularization_loss = FLAGS.final_lambda * tf.reduce_mean(
            tf.reduce_sum(kl_loss, axis=-1), axis=-1)

        final_label_loss = label_loss_fn.calculate_loss(
            final_predictions, labels_batch, label_weights=label_weights)

        label_loss = tf.reduce_sum(
            tf.stack(tower_label_losses)) + final_label_loss
        tf.summary.scalar("label_loss", label_loss)
        reg_loss = tf.reduce_sum(
            tf.stack(tower_reg_losses)) + regularization_loss
        tf.summary.scalar("reg_loss", reg_loss)
        final_loss = label_loss + regularization_penalty * reg_loss

    gradients = optimizer.compute_gradients(final_loss,
                                            colocate_gradients_with_ops=True)
    if clip_gradient_norm > 0:
        gradients = utils.clip_gradient_norms(gradients, clip_gradient_norm)
    final_train_op = optimizer.apply_gradients(gradients,
                                               global_step=global_step)

    tf.add_to_collection("global_step", global_step)
    tf.add_to_collection("loss", label_loss)
    tf.add_to_collection("predictions", final_predictions)
    tf.add_to_collection("input_batch_raw", model_input_raw)
    tf.add_to_collection("input_batch", model_input)
    tf.add_to_collection("num_frames", num_frames)
    tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
    tf.add_to_collection("train_op", final_train_op)
Esempio n. 22
0
def build_graph(reader,
                input_data_pattern,
                model,
                distill_readers=None,
                distill_data_patterns=None,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                transformer_class=feature_transform.DefaultTransformer):

    video_id, model_input_raw, labels_batch, num_frames = (
        get_input_data_tensors(reader,
                               input_data_pattern,
                               batch_size=batch_size))

    if distill_readers is not None:
        all_distill_labels = []
        for dreader, dpattern in zip(distill_readers, distill_data_patterns):
            distill_video_id, distill_labels_batch, unused_labels_batch, unused_num_frames = (
                get_input_data_tensors(dreader,
                                       dpattern,
                                       batch_size=batch_size))
            all_distill_labels.append(distill_labels_batch)
        all_distill_labels = tf.stack(all_distill_labels, axis=2)
        distill_weight_var = tf.get_variable("distill_weight",
                                             [len(distill_readers)])
        distill_weight = tf.nn.softmax(distill_weight_var)
        final_distill_labels = tf.einsum("ijk,k->ij", all_distill_labels,
                                         distill_weight)

    feature_transformer = transformer_class()
    model_input, num_frames = feature_transformer.transform(
        model_input_raw, num_frames=num_frames)

    with tf.name_scope("model"):
        if FLAGS.noise_level > 0:
            noise_level_tensor = tf.placeholder_with_default(
                0.0, shape=[], name="noise_level")
        else:
            noise_level_tensor = None

        if distill_readers is not None:
            distillation_predictions = final_distill_labels
        else:
            distillation_predictions = None

        if FLAGS.dropout:
            keep_prob_tensor = tf.placeholder_with_default(1.0,
                                                           shape=[],
                                                           name="keep_prob")
            result = model.create_model(
                model_input,
                num_frames=num_frames,
                vocab_size=reader.num_classes,
                labels=labels_batch,
                dropout=FLAGS.dropout,
                keep_prob=keep_prob_tensor,
                noise_level=noise_level_tensor,
                distillation_predictions=distillation_predictions,
                is_training=False)
        else:
            result = model.create_model(
                model_input,
                num_frames=num_frames,
                vocab_size=reader.num_classes,
                labels=labels_batch,
                noise_level=noise_level_tensor,
                distillation_predictions=distillation_predictions,
                is_training=False)

        print "result", result
        predictions = result["predictions"]

        tf.add_to_collection("predictions", predictions)
        tf.add_to_collection("video_id_batch", video_id)
        tf.add_to_collection("input_batch_raw", model_input_raw)
        tf.add_to_collection("input_batch", model_input)
        tf.add_to_collection("num_frames", num_frames)
        tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
        if FLAGS.dropout:
            tf.add_to_collection("keep_prob", keep_prob_tensor)
        if FLAGS.noise_level > 0:
            tf.add_to_collection("noise_level", noise_level_tensor)
Esempio n. 23
0
def build_graph(reader,
                model,
                train_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                base_learning_rate=0.01,
                learning_rate_decay_examples=1000000,
                learning_rate_decay=0.95,
                optimizer_class=tf.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_readers=1,
                num_epochs=None):
    """Creates the Tensorflow graph.

  This will only be called once in the life of
  a training model, because after the graph is created the model will be
  restored from a meta graph file rather than being recreated.

  Args:
    reader: The data file reader. It should inherit from BaseReader.
    model: The core model (e.g. logistic or neural net). It should inherit
           from BaseModel.
    train_data_pattern: glob path to the training data files.
    label_loss_fn: What kind of loss to apply to the model. It should inherit
                from BaseLoss.
    batch_size: How many examples to process at a time.
    base_learning_rate: What learning rate to initialize the optimizer with.
    optimizer_class: Which optimization algorithm to use.
    clip_gradient_norm: Magnitude of the gradient to clip to.
    regularization_penalty: How much weight to give the regularization loss
                            compared to the label loss.
    num_readers: How many threads to use for I/O operations.
    num_epochs: How many passes to make over the data. 'None' means an
                unlimited number of passes.
  """

    global_step = tf.Variable(0, trainable=False, name="global_step")

    learning_rate = tf.train.exponential_decay(base_learning_rate,
                                               global_step * batch_size,
                                               learning_rate_decay_examples,
                                               learning_rate_decay,
                                               staircase=True)
    tf.summary.scalar('learning_rate', learning_rate)
    if FLAGS.distillation_features:
        unused_video_id, model_input_raw, labels_batch, num_frames, distill_labels_batch = (
            get_input_data_tensors(reader,
                                   train_data_pattern,
                                   batch_size=batch_size,
                                   num_readers=num_readers,
                                   num_epochs=num_epochs))
        if FLAGS.distillation_features and FLAGS.distillation_type == 2:
            p = FLAGS.distillation_percent
            print("distillation_percent =", p, "reforming labels")
            float_labels = tf.cast(labels_batch, dtype=tf.float32)
            sum_float_labels = tf.reduce_sum(float_labels,
                                             axis=1,
                                             keep_dims=True)
            sum_distill_labels = tf.reduce_sum(
                distill_labels_batch, axis=1, keep_dims=True) + 1e-6
            distill_labels_batch = float_labels + distill_labels_batch * (
                sum_float_labels / sum_distill_labels * p)
            distill_labels_batch = tf.clip_by_value(distill_labels_batch,
                                                    clip_value_min=0.0,
                                                    clip_value_max=1.0)
    else:
        unused_video_id, model_input_raw, labels_batch, num_frames = (
            get_input_data_tensors(reader,
                                   train_data_pattern,
                                   batch_size=batch_size,
                                   num_readers=num_readers,
                                   num_epochs=num_epochs))
    tf.summary.histogram("model/input_raw", model_input_raw)

    feature_dim = len(model_input_raw.get_shape()) - 1
    if FLAGS.norm:
        model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)
    else:
        model_input = model_input_raw

    with tf.name_scope("model"):
        if FLAGS.distillation_features and FLAGS.distillation_type == 0:
            result = model.create_model(model_input,
                                        num_frames=num_frames,
                                        vocab_size=reader.num_classes,
                                        labels=labels_batch,
                                        distill_labels=distill_labels_batch)
        else:
            result = model.create_model(model_input,
                                        num_frames=num_frames,
                                        vocab_size=reader.num_classes,
                                        labels=labels_batch)

        for variable in slim.get_model_variables():
            tf.summary.histogram(variable.op.name, variable)

        predictions = result["predictions"]
        if "predictions_negative" in result.keys():
            predictions_negative = result["predictions_negative"]
        else:
            predictions_negative = 1 - predictions
        if "predictions_positive" in result.keys():
            predictions_positive = result["predictions_positive"]
        else:
            predictions_positive = predictions
        if predictions.get_shape().ndims == 3:
            predictions = tf.reshape(
                predictions, [-1, predictions.get_shape().as_list()[2]])
            labels_batch = tf.reshape(
                labels_batch, [-1, labels_batch.get_shape().as_list()[2]])
        if "bottleneck" in result.keys():
            bottle_neck = result["bottleneck"]
        else:
            bottle_neck = tf.constant(0.0)
        if "predictions_class" in result.keys():
            predictions_class = result["predictions_class"]
        else:
            predictions_class = predictions
        if "predictions_encoder" in result.keys():
            predictions_encoder = result["predictions_encoder"]
        else:
            predictions_encoder = predictions
        if "predictions_experts" in result.keys():
            predictions_experts = result["predictions_experts"]
        else:
            predictions_experts = predictions
        if "predictions_postprocess" in result.keys():
            predictions_postprocess = result["predictions_postprocess"]
        else:
            predictions_postprocess = predictions

        if "loss" in result.keys():
            append_loss = result["loss"]
        else:
            append_loss = tf.constant(0.0)
        if "predictions_encoder" in result.keys():
            label_loss, float_encoders = label_loss_fn.calculate_loss_mix2(
                predictions, predictions_class, predictions_encoder,
                labels_batch)
            tf.summary.histogram("model/float_encoders", float_encoders)
        elif FLAGS.distillation_features and FLAGS.distillation_type == 1:
            label_loss = label_loss_fn.calculate_loss_distill_boost(
                predictions, distill_labels_batch, labels_batch)
        elif FLAGS.distillation_features and FLAGS.distillation_type == 2:
            label_loss = label_loss_fn.calculate_loss_distill(
                predictions, distill_labels_batch, labels_batch)
        elif FLAGS.distillation_features and FLAGS.distillation_type == 3:
            label_loss = label_loss_fn.calculate_loss_distill_relabel(
                predictions, distill_labels_batch, labels_batch)
        elif "predictions_class" in result.keys():
            label_loss = label_loss_fn.calculate_loss_mix(
                predictions, predictions_class, labels_batch)
        elif "predictions_experts" in result.keys():
            label_loss = label_loss_fn.calculate_loss_max(
                predictions, predictions_experts, labels_batch)
        elif "predictions_postprocess" in result.keys():
            label_loss = label_loss_fn.calculate_loss_postprocess(
                predictions_postprocess, labels_batch)
        elif "predictions_negative" in result.keys():
            label_loss = label_loss_fn.calculate_loss_negative(
                predictions_positive, predictions_negative, labels_batch)
        else:
            label_loss = label_loss_fn.calculate_loss(predictions,
                                                      labels_batch)

        if "prediction_frames" in result.keys():
            predictions_frames = result["prediction_frames"]
            labels_frames = tf.tile(
                tf.reshape(labels_batch, [-1, 1, reader.num_classes]),
                [1, FLAGS.moe_num_extend, 1])
            labels_frames = tf.cast(
                tf.reshape(labels_frames, [-1, reader.num_classes]),
                tf.float32)
            frame_loss = label_loss_fn.calculate_loss(predictions_frames,
                                                      labels_frames)
            if "prediction_prepare_frames" in result.keys():
                prediction_prepare_frames = result["prediction_prepare_frames"]
                prediction_prepare_video = result["prediction_prepare_video"]
                max_frames = model_input.get_shape().as_list()[1]
                frames_sum = tf.reduce_sum(tf.abs(model_input), axis=2)
                frames_true = tf.ones(tf.shape(frames_sum))
                frames_false = tf.zeros(tf.shape(frames_sum))
                frames_bool = tf.where(tf.greater(frames_sum, frames_false),
                                       frames_true, frames_false)
                frames_bool = tf.reshape(
                    frames_bool[:, 0:max_frames:FLAGS.stride_size], [-1, 1])
                labels_prepare_frames = tf.tile(
                    tf.reshape(labels_batch, [-1, 1, reader.num_classes]),
                    [1, max_frames // FLAGS.stride_size, 1])
                labels_prepare_frames = tf.cast(
                    tf.reshape(labels_prepare_frames,
                               [-1, reader.num_classes]),
                    tf.float32) * frames_bool
                prediction_prepare_frames = prediction_prepare_frames * frames_bool
                label_loss = 0.1*label_loss_fn.calculate_loss(prediction_prepare_frames, labels_prepare_frames) + \
                            0.1*label_loss_fn.calculate_loss(prediction_prepare_video, labels_batch)
            else:
                label_loss = label_loss * 0.0
        elif "prediction_minmax" in result.keys():
            predictions_minmax = result["prediction_minmax"]
            predictions_min = tf.reduce_min(predictions_minmax, axis=1)
            predictions_max = tf.reduce_max(predictions_minmax, axis=1)
            epsilon = 10e-6
            float_labels = tf.cast(labels_batch, tf.float32)
            cross_entropy_loss = float_labels * tf.log(
                predictions_min + epsilon) + (
                    1 - float_labels) * tf.log(1 - predictions_max + epsilon)
            frame_loss = tf.reduce_mean(
                tf.reduce_sum(tf.negative(cross_entropy_loss), 1))
            label_loss = label_loss * 0.0
        else:
            frame_loss = tf.constant(0.0)
        tf.summary.scalar("label_loss", label_loss)

        if "regularization_loss" in result.keys():
            reg_loss = result["regularization_loss"]
        else:
            reg_loss = tf.constant(0.0)

        reg_losses = tf.losses.get_regularization_losses()
        if reg_losses:
            reg_loss += tf.add_n(reg_losses)

        if regularization_penalty != 0:
            tf.summary.scalar("reg_loss", reg_loss)

        # Adds update_ops (e.g., moving average updates in batch normalization) as
        # a dependency to the train_op.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if "update_ops" in result.keys():
            update_ops += result["update_ops"]
        if update_ops:
            with tf.control_dependencies(update_ops):
                barrier = tf.no_op(name="gradient_barrier")
                with tf.control_dependencies([barrier]):
                    label_loss = tf.identity(label_loss)

        # Incorporate the L2 weight penalties etc.
        final_loss = regularization_penalty * reg_loss + label_loss + frame_loss + append_loss
        if FLAGS.gradient == "my":
            opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
            variables_to_train = tf_variables.trainable_variables()
            top_grads, top_vars = mygradients(final_loss,
                                              variables_to_train,
                                              global_step=global_step,
                                              name="mygradients_net")
            grads_and_vars = list(zip(top_grads, top_vars))
            train_op = opt.apply_gradients(grads_and_vars,
                                           global_step=global_step)
        else:
            optimizer = optimizer_class(learning_rate)
            train_op = slim.learning.create_train_op(
                final_loss,
                optimizer,
                global_step=global_step,
                clip_gradient_norm=clip_gradient_norm)

        tf.add_to_collection("global_step", global_step)
        tf.add_to_collection("loss", final_loss)
        tf.add_to_collection("reg_loss", reg_loss)
        tf.add_to_collection("bottleneck", bottle_neck)
        tf.add_to_collection("predictions", predictions)
        tf.add_to_collection("input_batch_raw", model_input_raw)
        tf.add_to_collection("input_batch", model_input)
        tf.add_to_collection("num_frames", num_frames)
        tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
        tf.add_to_collection("train_op", train_op)
Esempio n. 24
0
def main():

    batch_size = 256
    num_classes = 10
    """ load dataset """
    dataset = loaders.Cifar10Loader('./datasets/CIFAR-10').load()
    train_dataset, valid_dataset = dataset
    """ processor """
    train_processor = processors.Cifar10ClassificationProcessor(
        batch_size,
        num_classes=num_classes,
        enable_augmentation=True,
        image_size=(32, 32))
    valid_processor = processors.Cifar10ClassificationProcessor(
        batch_size,
        num_classes=num_classes,
        enable_augmentation=False,
        image_size=(32, 32))
    """ iterator """
    train_iterator = iterators.MultiprocessIterator(train_dataset,
                                                    train_processor,
                                                    num_workers=1)
    valid_iterator = iterators.MultiprocessIterator(valid_dataset,
                                                    valid_processor,
                                                    num_workers=1)
    """ device """
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    """ model """
    model = models.RiriverceCifar10Net9(input_channels=3,
                                        num_classes=num_classes).to(device)
    """ loss """
    loss_function = losses.CrossEntropyLoss().to(device)
    """ optimizer """
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)
    """ logger """
    logger = loggers.SimpleLogger()
    """ learning """
    for epoch in range(100):
        print(f"-" * 64)
        print(f"[epoch {epoch:>4d}]")
        phase = 'train'
        torch.set_grad_enabled(True)
        for batch_data in tqdm.tqdm(train_iterator, desc=phase):
            optimizer.zero_grad()
            batch_image = torch.from_numpy(batch_data['image']).to(device)
            batch_target = torch.from_numpy(batch_data['target']).to(device)
            batch_output = model(batch_image)
            batch_loss = loss_function(batch_output, batch_target)
            batch_loss.sum().backward()
            optimizer.step()
            batch_loss = batch_loss.data.cpu().numpy()
            batch_label = np.argmax(batch_target.data.cpu().numpy(),
                                    axis=-1).flatten()
            batch_pred = np.argmax(batch_output.data.cpu().numpy(),
                                   axis=-1).flatten()
            logger.add_batch_loss(batch_loss, phase=phase)
            logger.add_batch_pred(batch_pred, phase=phase)
            logger.add_batch_label(batch_label, phase=phase)
        loss = logger.get_loss(phase)
        accuracy = logger.get_accuracy(phase)
        print(f"loss : {loss}")
        print(f"accuracy : {accuracy}")
        phase = 'valid'
        torch.set_grad_enabled(False)
        for batch_data in tqdm.tqdm(valid_iterator, desc=phase):
            optimizer.zero_grad()
            batch_image = torch.from_numpy(batch_data['image']).to(device)
            batch_target = torch.from_numpy(batch_data['target']).to(device)
            batch_output = model(batch_image)
            batch_loss = loss_function(batch_output, batch_target)
            batch_loss = batch_loss.data.cpu().numpy()
            batch_label = np.argmax(batch_target.data.cpu().numpy(),
                                    axis=-1).flatten()
            batch_pred = np.argmax(batch_output.data.cpu().numpy(),
                                   axis=-1).flatten()
            logger.add_batch_loss(batch_loss, phase=phase)
            logger.add_batch_pred(batch_pred, phase=phase)
            logger.add_batch_label(batch_label, phase=phase)
        loss = logger.get_loss(phase)
        accuracy = logger.get_accuracy(phase)
        print(f"loss : {loss:.4f}")
        print(f"accuracy : {accuracy:.4f}")
        logger.step()
Esempio n. 25
0
def evaluate():
    tf.set_random_seed(0)  # for reproducibility

    # Write json of flags
    model_flags_path = os.path.join(FLAGS.train_dir, "model_flags.json")
    if not file_io.file_exists(model_flags_path):
        raise IOError(("Cannot find file %s. Did you run train.py on the same "
                       "--train_dir?") % model_flags_path)
    flags_dict = json.loads(file_io.FileIO(model_flags_path, "r").read())

    with tf.Graph().as_default():
        # convert feature_names and feature_sizes to lists of values
        feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
            flags_dict["feature_names"], flags_dict["feature_sizes"])

        if flags_dict["frame_features"]:
            reader = readers.YT8MFrameFeatureReader(
                feature_names=feature_names, feature_sizes=feature_sizes)
        else:
            reader = readers.YT8MAggregatedFeatureReader(
                feature_names=feature_names, feature_sizes=feature_sizes)

        model = frame_level_models.NetVLADModelLF()
        model_lst = [model]
        if FLAGS.ensemble_num > 1:
            for ensemble_idx in range(1, FLAGS.ensemble_num):
                model2 = frame_level_models.NetVLADModelLF()
                model_lst.append(model2)

        label_loss_fn = losses.CrossEntropyLoss()

        if FLAGS.eval_data_pattern is "":
            raise IOError("'eval_data_pattern' was not specified. " +
                          "Nothing to evaluate.")

        video_id_batch = build_graph(reader=reader,
                                     model=model_lst,
                                     eval_data_pattern=FLAGS.eval_data_pattern,
                                     label_loss_fn=label_loss_fn,
                                     num_readers=FLAGS.num_readers,
                                     batch_size=FLAGS.batch_size)
        logging.info("built evaluation graph")
        if not FLAGS.force_output_model_name:
            video_id_batch = tf.get_collection("video_id_batch")[0]
        prediction_batch = tf.get_collection("predictions")[0]
        label_batch = tf.get_collection("labels")[0]
        loss = tf.get_collection("loss")[0]
        summary_op = tf.get_collection("summary_op")[0]

        saver = tf.train.Saver(tf.global_variables())

        summary_writer = tf.summary.FileWriter(FLAGS.train_dir,
                                               graph=tf.get_default_graph())

        evl_metrics = eval_util.EvaluationMetrics(reader.num_classes,
                                                  FLAGS.top_k)

        last_global_step_val = -1
        #with tf.device("/gpu:0"):
        while True:
            last_global_step_val = evaluation_loop(video_id_batch,
                                                   prediction_batch,
                                                   label_batch, loss,
                                                   summary_op, saver,
                                                   summary_writer, evl_metrics,
                                                   last_global_step_val)
            if FLAGS.run_once:
                break
Esempio n. 26
0
def build_graph(reader,
                model,
                train_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                base_learning_rate=0.01,
                learning_rate_decay_examples=1000000,
                learning_rate_decay=0.95,
                optimizer_class=tf.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_readers=1,
                num_epochs=None):

    global_step = tf.Variable(0, trainable=False, name="global_step")

    local_device_protos = device_lib.list_local_devices()
    gpus = [x.name for x in local_device_protos if x.device_type == 'GPU']
    num_gpus = len(gpus)

    if num_gpus > 0:
        logging.info("Using the following GPUs to train: " + str(gpus))
        num_towers = num_gpus
        device_string = '/gpu:%d'
    else:
        logging.info("No GPUs found. Training on CPU.")
        num_towers = 1
        device_string = '/cpu:%d'

    learning_rate = tf.train.exponential_decay(base_learning_rate,
                                               global_step * batch_size *
                                               num_towers,
                                               learning_rate_decay_examples,
                                               learning_rate_decay,
                                               staircase=True)
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = optimizer_class(learning_rate)
    unused_video_id, model_input_raw, labels_batch, num_frames = (
        get_input_data_tensors(reader,
                               train_data_pattern,
                               batch_size=batch_size * num_towers,
                               num_readers=num_readers,
                               num_epochs=num_epochs))
    tf.summary.histogram("model/input_raw", model_input_raw)

    feature_dim = len(model_input_raw.get_shape()) - 1

    model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)

    tower_inputs = tf.split(model_input, num_towers)
    tower_labels = tf.split(labels_batch, num_towers)
    tower_num_frames = tf.split(num_frames, num_towers)
    tower_gradients = []
    tower_predictions = []
    tower_label_losses = []
    tower_reg_losses = []
    for i in range(num_towers):
        # For some reason these 'with' statements can't be combined onto the same
        # line. They have to be nested.
        with tf.device(device_string % i):
            with (tf.variable_scope(("tower"), reuse=True if i > 0 else None)):
                with (slim.arg_scope(
                    [slim.model_variable, slim.variable],
                        device="/cpu:0" if num_gpus != 1 else "/gpu:0")):
                    result = model.create_model(tower_inputs[i],
                                                num_frames=tower_num_frames[i],
                                                vocab_size=reader.num_classes,
                                                labels=tower_labels[i])
                    for variable in slim.get_model_variables():
                        tf.summary.histogram(variable.op.name, variable)

                    predictions = result["predictions"]
                    tower_predictions.append(predictions)

                    if "loss" in result.keys():
                        label_loss = result["loss"]
                    else:
                        label_loss = label_loss_fn.calculate_loss(
                            predictions, tower_labels[i])

                    if "regularization_loss" in result.keys():
                        reg_loss = result["regularization_loss"]
                    else:
                        reg_loss = tf.constant(0.0)

                    reg_losses = tf.losses.get_regularization_losses()
                    if reg_losses:
                        reg_loss += tf.add_n(reg_losses)

                    tower_reg_losses.append(reg_loss)

                    # Adds update_ops (e.g., moving average updates in batch normalization) as
                    # a dependency to the train_op.
                    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                    if "update_ops" in result.keys():
                        update_ops += result["update_ops"]
                    if update_ops:
                        with tf.control_dependencies(update_ops):
                            barrier = tf.no_op(name="gradient_barrier")
                            with tf.control_dependencies([barrier]):
                                label_loss = tf.identity(label_loss)

                    tower_label_losses.append(label_loss)

                    # Incorporate the L2 weight penalties etc.
                    final_loss = regularization_penalty * reg_loss + label_loss
                    gradients = optimizer.compute_gradients(
                        final_loss, colocate_gradients_with_ops=False)
                    tower_gradients.append(gradients)
    label_loss = tf.reduce_mean(tf.stack(tower_label_losses))
    tf.summary.scalar("label_loss", label_loss)
    if regularization_penalty != 0:
        reg_loss = tf.reduce_mean(tf.stack(tower_reg_losses))
        tf.summary.scalar("reg_loss", reg_loss)
    merged_gradients = utils.combine_gradients(tower_gradients)

    if clip_gradient_norm > 0:
        with tf.name_scope('clip_grads'):
            merged_gradients = utils.clip_gradient_norms(
                merged_gradients, clip_gradient_norm)

    train_op = optimizer.apply_gradients(merged_gradients,
                                         global_step=global_step)

    tf.add_to_collection("global_step", global_step)
    tf.add_to_collection("loss", label_loss)
    tf.add_to_collection("predictions", tf.concat(tower_predictions, 0))
    tf.add_to_collection("input_batch_raw", model_input_raw)
    tf.add_to_collection("input_batch", model_input)
    tf.add_to_collection("num_frames", num_frames)
    tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
    tf.add_to_collection("train_op", train_op)
Esempio n. 27
0
def build_graph(all_readers,
                all_train_data_patterns,
                input_reader,
                input_data_pattern,
                model,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=256,
                base_learning_rate=0.01,
                learning_rate_decay_examples=1000000,
                learning_rate_decay=0.95,
                optimizer_class=tf.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_epochs=None):
    """Creates the Tensorflow graph.

  This will only be called once in the life of
  a training model, because after the graph is created the model will be
  restored from a meta graph file rather than being recreated.

  Args:
    all_readers: The data file readers. Every element in it should inherit from BaseReader.
    model: The core model (e.g. logistic or neural net). It should inherit
           from BaseModel.
    train_data_patterns: glob paths to the training data files.
    label_loss_fn: What kind of loss to apply to the model. It should inherit
                from BaseLoss.
    batch_size: How many examples to process at a time.
    base_learning_rate: What learning rate to initialize the optimizer with.
    optimizer_class: Which optimization algorithm to use.
    clip_gradient_norm: Magnitude of the gradient to clip to.
    regularization_penalty: How much weight to give the regularization loss
                            compared to the label loss.
    num_epochs: How many passes to make over the data. 'None' means an
                unlimited number of passes.
  """

    global_step = tf.Variable(0, trainable=False, name="global_step")

    learning_rate = tf.train.exponential_decay(base_learning_rate,
                                               global_step * batch_size,
                                               learning_rate_decay_examples,
                                               learning_rate_decay,
                                               staircase=True)
    tf.summary.scalar('learning_rate', learning_rate)

    original_input = None
    if input_data_pattern is not None:
        original_video_id, original_input, unused_labels_batch, unused_num_frames = (
            get_input_data_tensors(input_reader,
                                   input_data_pattern,
                                   batch_size=batch_size,
                                   num_epochs=num_epochs))

    optimizer = optimizer_class(learning_rate)
    model_input_raw_tensors = []
    labels_batch_tensor = None
    for reader, data_pattern in zip(all_readers, all_train_data_patterns):
        video_id, model_input_raw, labels_batch, unused_num_frames = (
            get_input_data_tensors(reader,
                                   data_pattern,
                                   batch_size=batch_size,
                                   num_epochs=num_epochs))
        if labels_batch_tensor is None:
            labels_batch_tensor = labels_batch
        model_input_raw_tensors.append(tf.expand_dims(model_input_raw, axis=2))

        if original_input is not None:
            id_match = tf.ones_like(original_video_id, dtype=tf.float32)
            id_match = id_match * tf.cast(
                tf.equal(original_video_id, video_id), dtype=tf.float32)
            tf.summary.scalar("model/id_match", tf.reduce_mean(id_match))

    model_input = tf.concat(model_input_raw_tensors, axis=2)
    labels_batch = labels_batch_tensor
    tf.summary.histogram("model/input", model_input)

    with tf.name_scope("model"):
        if FLAGS.noise_level > 0:
            noise_level_tensor = tf.placeholder_with_default(
                0.0, shape=[], name="noise_level")
        else:
            noise_level_tensor = None

        if FLAGS.dropout:
            keep_prob_tensor = tf.placeholder_with_default(1.0,
                                                           shape=[],
                                                           name="keep_prob")
            result = model.create_model(model_input,
                                        labels=labels_batch,
                                        vocab_size=reader.num_classes,
                                        original_input=original_input,
                                        dropout=FLAGS.dropout,
                                        keep_prob=keep_prob_tensor,
                                        noise_level=noise_level_tensor)
        else:
            result = model.create_model(model_input,
                                        labels=labels_batch,
                                        vocab_size=reader.num_classes,
                                        original_input=original_input,
                                        noise_level=noise_level_tensor)

        for variable in slim.get_model_variables():
            tf.summary.histogram(variable.op.name, variable)

        predictions = result["predictions"]
        if "loss" in result.keys():
            label_loss = result["loss"]
        else:
            video_weights_batch = None
            if FLAGS.reweight:
                video_weights_batch = get_video_weights(video_id)
            else:
                video_weights_batch = None

            if FLAGS.multitask:
                print "using multitask loss"
                support_predictions = result["support_predictions"]
                tf.summary.histogram("model/support_predictions",
                                     support_predictions)
                print "support_predictions", support_predictions
                label_loss = label_loss_fn.calculate_loss(
                    predictions,
                    support_predictions,
                    labels_batch,
                    weights=video_weights_batch)
            else:
                print "using original loss"
                label_loss = label_loss_fn.calculate_loss(
                    predictions, labels_batch, weights=video_weights_batch)

        tf.summary.histogram("model/predictions", predictions)
        tf.summary.scalar("label_loss", label_loss)

        if "regularization_loss" in result.keys():
            reg_loss = result["regularization_loss"]
        else:
            reg_loss = tf.constant(0.0)

        reg_losses = tf.losses.get_regularization_losses()
        if reg_losses:
            reg_loss += tf.add_n(reg_losses)

        if regularization_penalty != 0:
            tf.summary.scalar("reg_loss", reg_loss)

        # Adds update_ops (e.g., moving average updates in batch normalization) as
        # a dependency to the train_op.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if "update_ops" in result.keys():
            update_ops += result["update_ops"]
        if update_ops:
            with tf.control_dependencies(update_ops):
                barrier = tf.no_op(name="gradient_barrier")
                with tf.control_dependencies([barrier]):
                    label_loss = tf.identity(label_loss)

        # Incorporate the L2 weight penalties etc.
        final_loss = regularization_penalty * reg_loss + label_loss

        if FLAGS.training:
            gradients = optimizer.compute_gradients(
                final_loss, colocate_gradients_with_ops=False)
            if clip_gradient_norm > 0:
                with tf.name_scope('clip_grads'):
                    gradients = utils.clip_gradient_norms(
                        gradients, clip_gradient_norm)
            train_op = optimizer.apply_gradients(gradients,
                                                 global_step=global_step)
        else:
            train_op = tf.no_op()

        tf.add_to_collection("global_step", global_step)
        tf.add_to_collection("loss", label_loss)
        tf.add_to_collection("predictions", predictions)
        tf.add_to_collection("input_batch_raw", model_input)
        tf.add_to_collection("input_batch", model_input)
        tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
        tf.add_to_collection("train_op", train_op)
        if FLAGS.dropout:
            tf.add_to_collection("keep_prob", keep_prob_tensor)
        if FLAGS.noise_level > 0:
            tf.add_to_collection("noise_level", noise_level_tensor)
Esempio n. 28
0
def build_graph(refiner_model,
                discriminator_model,
                train_data_reader,
                train_data_pattern,
                true_label_reader,
                true_label_pattern,
                refiner_loss_fn=losses.CrossEntropyLoss(),
                similarity_loss_fn=losses.MeanSquareErrorLoss(),
                discriminator_loss_fn=losses.CrossEntropyLoss(),
                batch_size=4,
                base_learning_rate=0.01,
                learning_rate_decay_examples=4000,
                learning_rate_decay=0.99,
                optimizer_class=tf.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                prediction_threshold=0.5,
                regularization_penalty=1,
                num_readers=2,
                num_epochs=None):
  """Creates the Tensorflow graph.

  This will only be called once in the life of
  a training model, because after the graph is created the model will be
  restored from a meta graph file rather than being recreated.

  Args:
    reader: The data file reader. It should inherit from BaseReader.
    model: The core model (e.g. logistic or neural net). It should inherit
           from BaseModel.
    train_data_pattern: glob path to the training data files.
    label_loss_fn: What kind of loss to apply to the model. It should inherit
                from BaseLoss.
    batch_size: How many examples to process at a time.
    base_learning_rate: What learning rate to initialize the optimizer with.
    optimizer_class: Which optimization algorithm to use.
    clip_gradient_norm: Magnitude of the gradient to clip to.
    regularization_penalty: How much weight to give the regularization loss
                            compared to the label loss.
    num_readers: How many threads to use for I/O operations.
    num_epochs: How many passes to make over the data. 'None' means an
                unlimited number of passes.
  """
  
  global_step = tf.Variable(0, trainable=False, name="global_step")
  
  if FLAGS.accumulate_gradients:
    actual_batch_size = batch_size * FLAGS.apply_every_n_batches 
  else:
    actual_batch_size = batch_size

  learning_rate = tf.train.exponential_decay(
      base_learning_rate,
      global_step * actual_batch_size,
      learning_rate_decay_examples,
      learning_rate_decay,
      staircase=True)
  tf.summary.scalar('learning_rate', learning_rate)

  optimizer = optimizer_class(learning_rate)

  image_id, image_data, image_mask = (
      get_input_data_tensors(
          train_data_reader,
          train_data_pattern,
          batch_size=batch_size,
          num_readers=num_readers,
          num_epochs=num_epochs))

  _, _, true_mask = (
      get_input_data_tensors(
          true_label_reader,
          true_label_pattern,
          batch_size=batch_size,
          num_readers=num_readers,
          num_epochs=num_epochs))

  model_input = image_data
  tf.summary.histogram("model/input", model_input)

  with tf.name_scope("refiner_model"):
    result = refiner_model.create_model(
        model_input,
        scope="refiner_model",
        l2_penalty=FLAGS.l2_penalty)
    print "result", result

  for variable in slim.get_model_variables():
    tf.summary.histogram(variable.op.name, variable)

  predictions = result["predictions"]
  if "loss" in result.keys():
    label_loss = result["loss"]
  else:
    refiner_loss = refiner_loss_fn.calculate_loss(predictions, image_mask)
    image_input = tf.cast(tf.squeeze(image_data, axis=3) > 127, tf.int32)
    similarity_loss = similarity_loss_fn.calculate_loss(predictions, image_input)
    tf.summary.scalar("refiner_loss", refiner_loss)
    tf.summary.scalar("similarity_loss", similarity_loss)
    label_loss = refiner_loss + similarity_loss
    tf.summary.scalar("label_loss", label_loss)

    tf.add_to_collection("refiner_loss", refiner_loss)
    tf.add_to_collection("similarity_loss", similarity_loss)

  tf.summary.histogram("model/predictions", predictions)
  tf.summary.scalar("label_loss", label_loss)

  if "regularization_loss" in result.keys():
    reg_loss = result["regularization_loss"]
  else:
    reg_loss = tf.constant(0.0)
  
  reg_losses = tf.losses.get_regularization_losses(scope="refiner_model")
  if reg_losses:
    reg_loss += tf.add_n(reg_losses)
  
  if regularization_penalty != 0:
    tf.summary.scalar("reg_loss", reg_loss)

  # Adds update_ops (e.g., moving average updates in batch normalization) as
  # a dependency to the train_op.
  update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope="refiner_model")
  if "update_ops" in result.keys():
    update_ops += result["update_ops"]
  if update_ops:
    with tf.control_dependencies(update_ops):
      barrier = tf.no_op(name="gradient_barrier")
      with tf.control_dependencies([barrier]):
        bar_label_loss = tf.identity(label_loss)

  # Incorporate the L2 weight penalties etc.
  final_loss = regularization_penalty * reg_loss + bar_label_loss
  optimizing(optimizer, final_loss, clip_gradient_norm, global_step, prefix="refiner", scope="refiner_model")

  labels = tf.cast(image_mask, tf.int32)
  float_labels = tf.cast(image_mask, tf.float32)

  bool_predictions = tf.greater(predictions, prediction_threshold)
  true_pos = tf.cast(tf.reduce_sum(tf.cast(labels > 0, tf.int32) * tf.cast(predictions > prediction_threshold, tf.int32)), tf.float32)
  false_pos = tf.cast(tf.reduce_sum(tf.cast(labels <= 0, tf.int32) * tf.cast(predictions > prediction_threshold, tf.int32)), tf.float32)
  false_neg = tf.cast(tf.reduce_sum(tf.cast(labels > 0, tf.int32) * tf.cast(predictions <= prediction_threshold, tf.int32)), tf.float32)
  mean_iou = (2.0 * true_pos + 1e-7) / (2.0 * true_pos + false_pos + false_neg + 1e-7)
  print mean_iou

  num_examples = tf.shape(labels)[0]

  tf.add_to_collection("global_step", global_step)
  tf.add_to_collection("loss", label_loss)
  tf.add_to_collection("id_batch", image_id)
  tf.add_to_collection("predictions", predictions)
  tf.add_to_collection("model_input", model_input)
  tf.add_to_collection("num_examples", num_examples)
  tf.add_to_collection("labels", labels)
  tf.add_to_collection("float_labels", float_labels)
  tf.add_to_collection("bool_predictions", bool_predictions)
  tf.add_to_collection("mean_iou", mean_iou)


  def split_into_small_patches(masks, label_value):
    masks = tf.expand_dims(masks, axis=3)
    masks = tf.pad(masks, paddings=[[0,0], [0,0], [1,1], [0,0]])
    PATCH_SIZE = [1, 320, 320, 1]
    HALF_PATCH_SIZE = [1, 160, 160, 1]
    patches = tf.extract_image_patches(masks, PATCH_SIZE, HALF_PATCH_SIZE, [1,1,1,1], "VALID")
    patches = tf.reshape(patches, [-1, 320, 320, 1])
    if label_value == 0:
      labels = tf.zeros([tf.shape(patches)[0],1])
    else:
      labels = tf.ones([tf.shape(patches)[0],1])
    return patches, labels


  with tf.name_scope("discriminator_model"):
    p_patches, p_labels = split_into_small_patches(predictions, 0)
    t_patches, t_labels = split_into_small_patches(tf.cast(true_mask, tf.float32), 1)
    disc_batch = tf.concat([p_patches, t_patches], axis=0)
    disc_labels = tf.concat([p_labels, t_labels], axis=0)
    print "disc_batch", disc_batch
    print "disc_labels", disc_labels

    disc_result = discriminator_model.create_model(
        disc_batch,
        scope="discriminator_model",
        l2_penalty=FLAGS.l2_penalty)

    print "disc_result", disc_result

  for variable in slim.get_model_variables():
    tf.summary.histogram(variable.op.name, variable)

  disc_predictions = disc_result["predictions"]
  if "loss" in disc_result.keys():
    disc_label_loss = disc_result["loss"]
  else:
    disc_label_loss = discriminator_loss_fn.calculate_loss(disc_predictions, disc_labels) * 20000
    tf.summary.scalar("discriminator_loss", disc_label_loss)

  tf.summary.histogram("model/disc_predictions", disc_predictions)

  if "regularization_loss" in disc_result.keys():
    disc_reg_loss = result["regularization_loss"]
  else:
    disc_reg_loss = tf.constant(0.0)
  
  disc_reg_losses = tf.losses.get_regularization_losses(scope="discriminator_model")
  if disc_reg_losses:
    disc_reg_loss += tf.add_n(disc_reg_losses)
  
  if regularization_penalty != 0:
    tf.summary.scalar("disc_reg_loss", disc_reg_loss)

  # Adds update_ops (e.g., moving average updates in batch normalization) as
  # a dependency to the train_op.
  disc_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope="discriminator_model")
  if "update_ops" in disc_result.keys():
    disc_update_ops += disc_result["update_ops"]
  if disc_update_ops:
    with tf.control_dependencies(disc_update_ops):
      disc_barrier = tf.no_op(name="disc_gradient_barrier")
      with tf.control_dependencies([disc_barrier]):
        bar_disc_label_loss = tf.identity(disc_label_loss)

  # Incorporate the L2 weight penalties etc.
  disc_final_loss = regularization_penalty * disc_reg_loss + bar_disc_label_loss
  tf.add_to_collection("discriminator_loss", disc_label_loss)

  optimizing(optimizer, disc_final_loss, clip_gradient_norm, global_step, prefix="discriminator", scope="discriminator_model")

  # refiner 2
  refiner2_label_loss = label_loss - disc_label_loss

  # Adds update_ops (e.g., moving average updates in batch normalization) as
  # a dependency to the train_op.
  refiner2_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope="refiner_model")
  if "update_ops" in result.keys():
    refiner2_update_ops += result["update_ops"]
  if refiner2_update_ops:
    with tf.control_dependencies(refiner2_update_ops):
      refiner2_barrier = tf.no_op(name="gradient_barrier")
      with tf.control_dependencies([refiner2_barrier]):
        bar_refiner2_label_loss = tf.identity(refiner2_label_loss)

  refiner2_final_loss = regularization_penalty * reg_loss + bar_refiner2_label_loss
  optimizing(optimizer, refiner2_final_loss, clip_gradient_norm, global_step, prefix="refiner2", scope="refiner_model")
  tf.add_to_collection("refiner2_loss", refiner2_label_loss)
Esempio n. 29
0
def build_graph(reader,
                model,
                train_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                base_learning_rate=0.01,
                learning_rate_decay_examples=1000000,
                learning_rate_decay=0.95,
                optimizer_class=tf.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_readers=1,
                num_epochs=None):
    """Creates the Tensorflow graph.
  This will only be called once in the life of
  a training model, because after the graph is created the model will be
  restored from a meta graph file rather than being recreated.
  Args:
    reader: The data file reader. It should inherit from BaseReader.
    model: The core model (e.g. logistic or neural net). It should inherit
           from BaseModel.
    train_data_pattern: glob path to the training data files.
    label_loss_fn: What kind of loss to apply to the model. It should inherit
                from BaseLoss.
    batch_size: How many examples to process at a time.
    base_learning_rate: What learning rate to initialize the optimizer with.
    optimizer_class: Which optimization algorithm to use.
    clip_gradient_norm: Magnitude of the gradient to clip to.
    regularization_penalty: How much weight to give the regularization loss
                            compared to the label loss.
    num_readers: How many threads to use for I/O operations.
    num_epochs: How many passes to make over the data. 'None' means an
                unlimited number of passes.
  """

    global_step = tf.Variable(0, trainable=False, name="global_step")

    local_device_protos = device_lib.list_local_devices()
    gpus = [x.name for x in local_device_protos if x.device_type == 'GPU']
    gpus = gpus[:FLAGS.num_gpu]
    num_gpus = len(gpus)

    if num_gpus > 0:
        logging.info("Using the following GPUs to train: " + str(gpus))
        num_towers = num_gpus
        device_string = '/gpu:%d'
    else:
        logging.info("No GPUs found. Training on CPU.")
        num_towers = 1
        device_string = '/cpu:%d'

    learning_rate = tf.train.exponential_decay(base_learning_rate,
                                               global_step * batch_size *
                                               num_towers,
                                               learning_rate_decay_examples,
                                               learning_rate_decay,
                                               staircase=True)
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = optimizer_class(learning_rate)
    unused_video_id, model_input_raw, labels_batch, num_frames = (
        get_input_data_tensors(reader,
                               train_data_pattern,
                               batch_size=batch_size * num_towers,
                               num_readers=num_readers,
                               num_epochs=num_epochs))
    tf.summary.histogram("model/input_raw", model_input_raw)

    feature_dim = len(model_input_raw.get_shape()) - 1

    model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)

    tower_inputs = tf.split(model_input, num_towers)
    tower_labels = tf.split(labels_batch, num_towers)
    tower_num_frames = tf.split(num_frames, num_towers)
    tower_gradients = []
    tower_predictions = []
    tower_label_losses = []
    tower_reg_losses = []
    for i in range(num_towers):
        # For some reason these 'with' statements can't be combined onto the same
        # line. They have to be nested.
        with tf.device(device_string % i):
            with (tf.variable_scope(("tower"), reuse=True if i > 0 else None)):
                with (slim.arg_scope(
                    [slim.model_variable, slim.variable],
                        device="/cpu:0" if num_gpus != 1 else "/gpu:0")):
                    result = model.create_model(tower_inputs[i],
                                                num_frames=tower_num_frames[i],
                                                vocab_size=reader.num_classes,
                                                labels=tower_labels[i])
                    for variable in slim.get_model_variables():
                        tf.summary.histogram(variable.op.name, variable)

                    predictions = result["predictions"]
                    tower_predictions.append(predictions)

                    if "loss" in result.keys():
                        label_loss = result["loss"]
                    else:
                        label_loss = label_loss_fn.calculate_loss(
                            predictions, tower_labels[i])

                    if "regularization_loss" in result.keys():
                        reg_loss = result["regularization_loss"]
                    else:
                        reg_loss = tf.constant(0.0)

                    reg_losses = tf.losses.get_regularization_losses()
                    if reg_losses:
                        reg_loss += tf.add_n(reg_losses)

                    tower_reg_losses.append(reg_loss)

                    # Adds update_ops (e.g., moving average updates in batch normalization) as
                    # a dependency to the train_op.
                    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                    if "update_ops" in result.keys():
                        update_ops += result["update_ops"]
                    if update_ops:
                        with tf.control_dependencies(update_ops):
                            barrier = tf.no_op(name="gradient_barrier")
                            with tf.control_dependencies([barrier]):
                                label_loss = tf.identity(label_loss)

                    tower_label_losses.append(label_loss)

                    # Incorporate the L2 weight penalties etc.
                    final_loss = regularization_penalty * reg_loss + label_loss
                    gradients = optimizer.compute_gradients(
                        final_loss, colocate_gradients_with_ops=False)
                    tower_gradients.append(gradients)
    label_loss = tf.reduce_mean(tf.stack(tower_label_losses))
    tf.summary.scalar("label_loss", label_loss)
    if regularization_penalty != 0:
        reg_loss = tf.reduce_mean(tf.stack(tower_reg_losses))
        tf.summary.scalar("reg_loss", reg_loss)
    merged_gradients = utils.combine_gradients(tower_gradients)

    if clip_gradient_norm > 0:
        with tf.name_scope('clip_grads'):
            merged_gradients = utils.clip_gradient_norms(
                merged_gradients, clip_gradient_norm)

    train_op = optimizer.apply_gradients(merged_gradients,
                                         global_step=global_step)

    tf.add_to_collection("global_step", global_step)
    tf.add_to_collection("loss", label_loss)
    tf.add_to_collection("predictions", tf.concat(tower_predictions, 0))
    tf.add_to_collection("input_batch_raw", model_input_raw)
    tf.add_to_collection("input_batch", model_input)
    tf.add_to_collection("num_frames", num_frames)
    tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
    tf.add_to_collection("train_op", train_op)
def build_graph(reader,
                generator_model,
                discriminator_model,
                train_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                base_learning_rate=0.01,
                learning_rate_decay_examples=1000000,
                learning_rate_decay=0.95,
                optimizer_class=tf.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_readers=1,
                num_epochs=None):
    """Creates the Tensorflow graph.

  This will only be called once in the life of
  a training model, because after the graph is created the model will be
  restored from a meta graph file rather than being recreated.

  Args:
    reader: The data file reader. It should inherit from BaseReader.
    generator_model: The core model for generator. It should inherit from
                     BaseModel.
    discriminator_model: The core model for discriminator. It should inherit from
                         BaseModel.
    train_data_pattern: glob path to the training data files.
    label_loss_fn: What kind of loss to apply to the model. It should inherit
                from BaseLoss.
    batch_size: How many examples to process at a time.
    base_learning_rate: What learning rate to initialize the optimizer with.
    optimizer_class: Which optimization algorithm to use.
    clip_gradient_norm: Magnitude of the gradient to clip to.
    regularization_penalty: How much weight to give the regularization loss
                            compared to the label loss.
    num_readers: How many threads to use for I/O operations.
    num_epochs: How many passes to make over the data. 'None' means an
                unlimited number of passes.
  """

    global_step = tf.Variable(0, trainable=False, name="global_step")

    gpus = get_gpus()
    num_gpus = len(gpus)

    if num_gpus > 0:
        logging.info("Using the following GPUs to train: " + str(gpus))
        num_towers = num_gpus
        device_string = '/gpu:%d'
    else:
        logging.info("No GPUs found. Training on CPU.")
        num_towers = 1
        device_string = '/cpu:%d'

    learning_rate = tf.train.exponential_decay(base_learning_rate,
                                               global_step * batch_size *
                                               num_towers,
                                               learning_rate_decay_examples,
                                               learning_rate_decay,
                                               staircase=True)
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = optimizer_class(learning_rate)

    model_input_raw, _ = (get_input_data_tensors(reader,
                                                 train_data_pattern,
                                                 batch_size=batch_size *
                                                 num_towers,
                                                 num_readers=num_readers,
                                                 num_epochs=num_epochs))
    tf.summary.histogram("model/input_raw", model_input_raw)
    model_input = model_input_raw

    noise_input = tf.placeholder(
        tf.float32, shape=[None, random_noise_generator.get_dim()])

    image_width, image_height = reader.get_image_size()

    tower_inputs = tf.split(model_input, num_towers)
    tower_noise_input = tf.split(noise_input, num_towers)
    tower_D_gradients = []
    tower_G_gradients = []
    tower_generated_images = []
    tower_predictions_for_fake = []
    tower_predictions_for_real = []
    tower_D_losses = []
    tower_G_losses = []

    for i in range(num_towers):
        # For some reason these 'with' statements can't be combined onto the same
        # line. They have to be nested.
        with tf.device(device_string % i):
            with (tf.variable_scope(("tower"), reuse=True if i > 0 else None)):
                with (slim.arg_scope(
                    [slim.model_variable, slim.variable],
                        device="/cpu:0" if num_gpus != 1 else "/gpu:0")):
                    generator_model.create_model(image_width * image_height)
                    discriminator_model.create_model(image_width *
                                                     image_height)

                    generated_result = generator_model.run_model(
                        tower_noise_input[i])
                    generated_images = generated_result["output"]

                    generated_images_shaped = tf.reshape(
                        generated_images, [-1, image_height, image_width, 1])
                    tf.summary.image('generated_images',
                                     generated_images_shaped, 10)
                    tower_generated_images.append(generated_images)

                    result_from_fake = discriminator_model.run_model(
                        generated_images)
                    result_from_real = discriminator_model.run_model(
                        tower_inputs[i])
                    for variable in slim.get_model_variables():
                        tf.summary.histogram(variable.op.name, variable)

                    predictions_for_fake = result_from_fake["predictions"]
                    predictions_for_real = result_from_real["predictions"]
                    tower_predictions_for_fake.append(predictions_for_fake)
                    tower_predictions_for_real.append(predictions_for_real)

                    logits_for_fake = result_from_fake["logits"]
                    logits_for_real = result_from_real["logits"]
                    D_loss_fake = label_loss_fn.calculate_loss(
                        logits_for_fake, tf.zeros_like(logits_for_fake))
                    D_loss_real = label_loss_fn.calculate_loss(
                        logits_for_real, tf.ones_like(logits_for_real))
                    D_loss = D_loss_fake + D_loss_real
                    tower_D_losses.append(D_loss)

                    G_loss = label_loss_fn.calculate_loss(
                        logits_for_fake, tf.ones_like(logits_for_fake))
                    tower_G_losses.append(G_loss)

                    D_var = discriminator_model.get_variables()
                    D_gradients = optimizer.compute_gradients(D_loss,
                                                              var_list=D_var)
                    tower_D_gradients.append(D_gradients)

                    G_var = generator_model.get_variables()
                    G_gradients = optimizer.compute_gradients(G_loss,
                                                              var_list=G_var)
                    tower_G_gradients.append(G_gradients)

    D_loss = tf.reduce_mean(tf.stack(tower_D_losses))
    G_loss = tf.reduce_mean(tf.stack(tower_G_losses))
    tf.summary.scalar("D_loss", D_loss)
    tf.summary.scalar("G_loss", G_loss)
    merged_D_gradients = utils.combine_gradients(tower_D_gradients)
    merged_G_gradients = utils.combine_gradients(tower_G_gradients)

    if clip_gradient_norm > 0:
        with tf.name_scope('clip_grads'):
            merged_D_gradients = utils.clip_gradient_norms(
                merged_D_gradients, clip_gradient_norm)
            merged_G_gradients = utils.clip_gradient_norms(
                merged_G_gradients, clip_gradient_norm)

    # Attach global_step only once so that it will be increased by 1.
    D_train_op = optimizer.apply_gradients(merged_D_gradients)
    G_train_op = optimizer.apply_gradients(merged_G_gradients,
                                           global_step=global_step)

    tf.add_to_collection("global_step", global_step)
    tf.add_to_collection("D_loss", D_loss)
    tf.add_to_collection("G_loss", G_loss)
    tf.add_to_collection("p_for_fake", tf.concat(tower_predictions_for_fake,
                                                 0))
    tf.add_to_collection("p_for_data", tf.concat(tower_predictions_for_real,
                                                 0))
    tf.add_to_collection("input_batch_raw", model_input_raw)
    tf.add_to_collection("input_batch", model_input)
    tf.add_to_collection("generated_images",
                         tf.concat(tower_generated_images, 0))
    tf.add_to_collection("D_train_op", D_train_op)
    tf.add_to_collection("G_train_op", G_train_op)
    tf.add_to_collection("noise_input_placeholder", noise_input)