Esempio n. 1
0
def rcnn_base(inputs,
              hp,
              rois,
              roi_scores,
              bbox_labels,
              roi_pool_layer,
              head_to_tail,
              trainable=True,
              anchor_labels=None,
              cls_weights_initializer=None,
              reg_weights_initializer=None):
    """
    A Region Proposal Network (RPN) takes an image (of any size) as input and outputs a set of
    rectangular object proposals, each with an objectness score[quoted from Faster-RCNN]
    :param inputs: image
    :param hp: hyper parameters
    :param rois: regions of interest
    :param roi_scores: scores of all rois
    :param roi_pool_layer: rois' pooling function(layer)
    :param head_to_tail: fully connect network
    :param trainable: whether to train this network
    :param bbox_labels: target bounding box
    :param anchor_labels: anchor labels when using rpn to generate roi
    :param cls_weights_initializer: weights initializer for classification layer
    :param reg_weights_initializer: weights initializer for regression layer
    :return: rect object proposals, scores, print_pool(dict) for debugging, activation(dict) for visualization
    """
    # For debugging
    print_pool = dict()
    # For activation
    activation = None
    with tf.variable_scope("rcnn"):
        with tf.device(helper.get_device_str(device_id=0,
                                             num_gpus=hp.num_gpus)):
            # Fill rcnn's bbox_label, class_label, in_weights, out_weights, rois
            rcnn_info, rois, _ = helper.pack_proposal_info(
                anchor_labels,
                rois,
                bbox_scores=roi_scores,
                bbox_targets=tf.squeeze(bbox_labels, axis=0),
                num_class=hp.num_class)
            pool = roi_pool_layer(inputs, rois)
            fc = head_to_tail(pool)
            probs, predicts, scores = _rcnn_cls_layer(
                fc,
                hp.num_class,
                trainable=trainable,
                weights_initializer=cls_weights_initializer)
            deltas = _rcnn_reg_layer(
                fc,
                4 * hp.num_class,
                trainable=trainable,
                weights_initializer=reg_weights_initializer)
            # Pack rcnn info into dict for calculating loss, only for training
            misc.append_params(rcnn_info,
                               class_scores=scores,
                               class_predicts=predicts,
                               class_probs=probs,
                               bbox_predicts=deltas)
    return rcnn_info, print_pool, activation
Esempio n. 2
0
def _build_encoder_simple(model, intent, intent_length, num_units):
    """Build an encoder for intent."""
    with tf.variable_scope("encoder") as scope:
        dtype = scope.dtype
        # Look up embedding, emp_inp: [max_time, batch_size, num_units]
        encoder_emb_inp = tf.nn.embedding_lookup(model.embedding_encoder,
                                                 intent)

        cell = model_helper._single_cell(
            num_units,
            model.hparams.dropout,
            model.mode,
            residual_connection=False,
            device_str=model_helper.get_device_str(model.global_gpu_num,
                                                   model.hparams.num_gpus))
        model.global_gpu_num += 1

        encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
            cell,
            encoder_emb_inp,
            dtype=dtype,
            sequence_length=intent_length,
            time_major=False,
            swap_memory=True)

    return encoder_outputs, encoder_state, encoder_emb_inp
Esempio n. 3
0
def _build_encoder_hierarchial(model, data_source, num_units):
    """Build an encoder for kb."""

    source = data_source  # bs*num_entry, 13

    with tf.variable_scope("encoder") as scope:
        dtype = scope.dtype
        # Look up embedding, emp_inp: [max_time, batch_size, num_units]
        encoder_emb_inp = tf.nn.embedding_lookup(model.embedding_encoder,
                                                 source)

        # Encoder_outpus: [max_time, batch_size, num_units]
        cell_0 = model_helper._single_cell(
            num_units,
            model.hparams.dropout,
            model.mode,
            residual_connection=False,
            device_str=model_helper.get_device_str(model.global_gpu_num,
                                                   model.hparams.num_gpus))
        model.global_gpu_num += 1
        with tf.variable_scope("hierarchial_rnn_1") as scope:
            _, encoder_final_states_0 = tf.nn.dynamic_rnn(cell_0,
                                                          encoder_emb_inp,
                                                          dtype=dtype,
                                                          time_major=False,
                                                          swap_memory=True)
        encoder_final_states_0 = tf.reshape(encoder_final_states_0,
                                            [model.batch_size, -1, num_units])
        cell_1 = model_helper._single_cell(
            num_units,
            model.hparams.dropout,
            model.mode,
            residual_connection=False,
            device_str=model_helper.get_device_str(model.global_gpu_num,
                                                   model.hparams.num_gpus))
        model.global_gpu_num += 1
        with tf.variable_scope("hierarchial_rnn_2") as scope:
            encoder_outputs_1, encoder_state_1 = tf.nn.dynamic_rnn(
                cell_1,
                encoder_final_states_0,
                dtype=dtype,
                time_major=False,
                swap_memory=True)
    return encoder_outputs_1, encoder_state_1, encoder_emb_inp
Esempio n. 4
0
  def build_graph(self, hparams, scope=None):
    """Subclass must implement this method.

    Creates a sequence-to-sequence model with dynamic RNN decoder API.
    Args:
      hparams: Hyperparameter configurations.
      scope: VariableScope for the created subgraph; default "dynamic_seq2seq".

    Returns:
      A tuple of the form (logits, loss_tuple, final_context_state, sample_id),
      where:
        logits: float32 Tensor [batch_size x num_decoder_symbols].
        loss: loss = the total loss / batch_size.
        final_context_state: the final state of decoder RNN.
        sample_id: sampling indices.

    Raises:
      ValueError: if encoder_type differs from mono and bi, or
        attention_option is not (luong | scaled_luong |
        bahdanau | normed_bahdanau).
    """
    utils.print_out("# Creating %s graph ..." % self.mode)

    # Projection
    if not self.extract_encoder_layers:
      with tf.variable_scope(scope or "build_network"):
        with tf.variable_scope("decoder/output_projection"):
          self.output_layer = tf.layers.Dense(
              self.tgt_vocab_size, use_bias=False, name="output_projection")

    with tf.variable_scope(scope or "dynamic_seq2seq", dtype=self.dtype):
      # Encoder
      if hparams.language_model:  # no encoder for language modeling
        utils.print_out("  language modeling: no encoder")
        self.encoder_outputs = None
        encoder_state = None
      else:
        self.encoder_outputs, encoder_state = self._build_encoder(hparams)

      # Skip decoder if extracting only encoder layers
      if self.extract_encoder_layers:
        return

      ## Decoder
      logits, decoder_cell_outputs, sample_id, final_context_state = (
          self._build_decoder(self.encoder_outputs, encoder_state, hparams))

      ## Loss
      if self.mode != tf.contrib.learn.ModeKeys.INFER:
        with tf.device(model_helper.get_device_str(self.num_encoder_layers - 1,
                                                   self.num_gpus)):
          loss = self._compute_loss(logits, decoder_cell_outputs)
      else:
        loss = tf.constant(0.0)

      return logits, loss, final_context_state, sample_id
Esempio n. 5
0
    def _build_decoder_cell(self, hparams, encoder_outputs, encoder_state, source_sequence_length):
        """Build a RNN cell with attention mechanism that can be used by decoder."""
        attention_option = hparams.attention
        attention_architecture = hparams.attention_architecture

        if attention_architecture != "standard":
            raise ValueError(
                "Unknown attention architecture %s" % attention_architecture)

        num_units = hparams.num_units
        num_layers = hparams.num_layers
        num_residual_layers = hparams.num_residual_layers
        beam_width = hparams.beam_width

        dtype = tf.float32

        if self.time_major:
            memory = tf.transpose(encoder_outputs, [1, 0, 2])
        else:
            memory = encoder_outputs

        if self.mode == tf.contrib.learn.ModeKeys.INFER and beam_width > 0:
            memory = tf.contrib.seq2seq.tile_batch(memory, multiplier=beam_width)
            source_sequence_length = tf.contrib.seq2seq.tile_batch(source_sequence_length, multiplier=beam_width)
            encoder_state = tf.contrib.seq2seq.tile_batch(encoder_state, multiplier=beam_width)
            batch_size = self.batch_size * beam_width
        else:
            batch_size = self.batch_size

        attention_mechanism = create_attention_mechanism(attention_option, num_units, memory, source_sequence_length)

        cell = model_helper.create_rnn_cell(unit_type=hparams.unit_type,
                                            num_units=num_units,
                                            num_layers=num_layers,
                                            num_residual_layers=num_residual_layers,
                                            forget_bias=hparams.forget_bias,
                                            dropout=hparams.dropout,
                                            base_gpu=hparams.base_gpu,
                                            mode=self.mode,
                                            single_cell_fn=self.single_cell_fn)

        # Only generate alignment in greedy INFER mode.
        alignment_history = (self.mode == tf.contrib.learn.ModeKeys.INFER and beam_width == 0)

        cell = tf.contrib.seq2seq.AttentionWrapper(cell, attention_mechanism, attention_layer_size=num_units,
                                                   alignment_history=alignment_history, name="attention")

        cell = tf.contrib.rnn.DeviceWrapper(cell, model_helper.get_device_str(hparams.base_gpu))

        if hparams.pass_hidden_state:
            decoder_initial_state = cell.zero_state(batch_size, dtype).clone(cell_state=encoder_state)
        else:
            decoder_initial_state = cell.zero_state(batch_size, dtype)

        return cell, decoder_initial_state
Esempio n. 6
0
    def build_graph(self, hparams, scope=None):
        """Subclass must implement this method.

    Creates a sequence-to-sequence model with dynamic RNN decoder API.
    Args:
      hparams: Hyperparameter configurations.
      scope: VariableScope for the created subgraph; default "dynamic_seq2seq".

    Returns:
      A tuple of the form (logits, loss_tuple, final_context_state, sample_id),
      where:
        logits: float32 Tensor [batch_size x num_decoder_symbols].
        loss: loss = the total loss / batch_size.    
    """
        utils.print_out("\n# Creating %s graph ..." % self.mode)

        with tf.variable_scope(scope or "rnn", dtype=self.dtype):
            # Encoder
            self.encoder_outputs, encoder_state = self._build_encoder(hparams)
            fw_state, bw_state = encoder_state
            print('encoder_outputs: ', self.encoder_outputs.shape)
            print('fw_state.h: ', fw_state.h.shape)
            print('bw_state.h: ', bw_state.h.shape)

            # Linear layer for classification of intent
            encoder_last_state = tf.concat([fw_state.h, bw_state.h], axis=1)
            print('encoder_last_state: ', encoder_last_state.shape)
            print()

            encoder_output_size = encoder_last_state.get_shape()[1].value
            print('encoder_output_size: ', encoder_output_size)
            w = tf.get_variable('w',
                                [encoder_output_size, self.lbl_vocab_size],
                                dtype=tf.float32)
            w_t = tf.transpose(w)
            v = tf.get_variable('v', [self.lbl_vocab_size], dtype=tf.float32)

            # apply the linear layer
            label_logits = tf.nn.xw_plus_b(encoder_last_state, w, v)
            label_pred = tf.argmax(label_logits, 1)
            print('label_scores: ', label_logits.shape)
            print()

            ## Loss
            if self.mode != tf.contrib.learn.ModeKeys.INFER:
                with tf.device(
                        model_helper.get_device_str(
                            self.num_encoder_layers - 1, self.num_gpus)):
                    loss = self._compute_loss(label_logits)
            else:
                loss = tf.constant(0.0)

            return label_logits, loss, label_pred
Esempio n. 7
0
def create_train_model(model_creator,
                       hparams,
                       scope=None,
                       single_cell_fn=None):
    """Create train graph, model, and iterator."""
    src_file = "%s.%s" % (hparams.train_prefix, hparams.src)
    tgt_file = "%s.%s" % (hparams.train_prefix, hparams.tgt)

    tgt_vocab_file = hparams.tgt_vocab_file

    graph = tf.Graph()

    with graph.as_default():

        tgt_vocab_table = vocab_utils.create_tgt_vocab_table(tgt_vocab_file)

        src_dataset = tf.contrib.data.TextLineDataset(src_file)
        tgt_dataset = tf.contrib.data.TextLineDataset(tgt_file)
        skip_count_placeholder = tf.placeholder(shape=(), dtype=tf.int64)

        iterator = iterator_utils.get_iterator(
            src_dataset,
            tgt_dataset,
            tgt_vocab_table,
            sos=hparams.sos,
            eos=hparams.eos,
            source_reverse=hparams.source_reverse,
            random_seed=hparams.random_seed,
            src_max_len=hparams.src_max_len,
            tgt_max_len=hparams.tgt_max_len,
            skip_count=skip_count_placeholder)

        # Note: One can set model_device_fn to `tf.train.replica_device_setter(ps_tasks)` for distributed training.
        with tf.device(model_helper.get_device_str(hparams.base_gpu)):
            # model_creator: 模型
            model = model_creator(hparams,
                                  iterator=iterator,
                                  mode=tf.contrib.learn.ModeKeys.TRAIN,
                                  target_vocab_table=tgt_vocab_table,
                                  scope=scope,
                                  single_cell_fn=single_cell_fn)

    return TrainModel(graph=graph,
                      model=model,
                      iterator=iterator,
                      skip_count_placeholder=skip_count_placeholder)
Esempio n. 8
0
    def build_graph(self, hparams, scope=None):
        """Subclass must implement this method.

    Creates a sequence-to-sequence model with dynamic RNN decoder API.
    Args:
      hparams: Hyperparameter configurations.
      scope: VariableScope for the created subgraph; default "dynamic_seq2seq".

    Returns:
      A tuple of the form (logits, loss, final_context_state),
      where:
        logits: float32 Tensor [batch_size x num_decoder_symbols].
        loss: the total loss / batch_size.
        final_context_state: The final state of decoder RNN.

    Raises:
      ValueError: if encoder_type differs from mono and bi, or
        attention_option is not (luong | scaled_luong |
        bahdanau | normed_bahdanau).
    """
        utils.print_out("# creating %s graph ..." % self.mode)
        dtype = tf.float32

        with tf.variable_scope(scope or "dynamic_seq2seq", dtype=dtype):
            # Encoder
            encoder_outputs, encoder_state = self._build_encoder(hparams)

            ## Decoder
            logits, sample_id, final_context_state = self._build_decoder(
                encoder_outputs, encoder_state, hparams)
            print("logits", logits)
            print("sample_id", sample_id)
            print("final_context_state", final_context_state)

            ## Loss
            if self.mode != tf.contrib.learn.ModeKeys.INFER:
                with tf.device(
                        model_helper.get_device_str(
                            self.num_encoder_layers - 1, self.num_gpus)):
                    loss = self._compute_loss(logits)
            else:
                loss = logits

            return logits, loss, final_context_state, sample_id
Esempio n. 9
0
    def build_graph(self, flags):
        """Build A2C graph."""
        with tf.variable_scope(flags.model_name):
            # TODO. Fix tf.device for multiple gpu
            with tf.device(model_helper.get_device_str(0, self.num_gpus)):
                c1 = self.conv2d(self.state, self.cv_num_outputs, 
                                 self.f_height, self.f_width, 
                                 self.stride, scope="conv2d_1")
                c2 = self.conv2d(c1, self.cv_num_outputs*2, 
                                 self.f_height/2, self.f_width/2, 
                                 self.stride/2, scope="conv2d_2")
                fc = self.linear(self.flatten(c2), self.num_units,
                                 activation_fn=tf.nn.relu, scope='flat')

                # modify the shape of the fc before rnn
                # [1, None, self.flat_outputs]
                rnn_input = tf.reshape(fc, [1, -1, self.num_units])     
                step_size = tf.shape(rnn_input)[1:2] 

                cell = self.create_rnn_cell()
                self.h_in = cell.zero_state(1, tf.float32)

                rnn_output, self.h_out = tf.nn.dynamic_rnn(
                    cell, rnn_input, initial_state=self.h_in, 
                    sequence_length=step_size)  
                rnn_output = tf.reshape(rnn_output, [-1, self.num_units])

                # policy
                self.policy = self.linear(
                    rnn_output, self.action_size,
                    activation_fn=tf.nn.softmax, scope='policy')
                # value
                self.value = self.linear(
                    rnn_output, 1, scope='value')

                # compute loss
                if self.mode != tf.estimator.ModeKeys.PREDICT:
                    loss = self.compute_loss()
                else:
                    loss = tf.constant(0.0)

                return loss
Esempio n. 10
0
    def __init__(self, hparams, mode, iterator, target_vocab_table, reverse_target_vocab_table=None, scope=None, single_cell_fn=None):

        """Create the model.

        Args:
          hparams: Hyperparameter configurations.
          mode: TRAIN | EVAL | INFER
          iterator: Dataset Iterator that feeds data.
          target_vocab_table: Lookup table mapping target words to ids.
          reverse_target_vocab_table: Lookup table mapping ids to target words. Only
            required in INFER mode. Defaults to None.
          scope: scope of the model.
          single_cell_fn: allow for adding customized cell. When not specified,
            we default to model_helper._single_cell
        """


        assert isinstance(iterator, iterator_utils.BatchedInput)

        self.iterator = iterator
        self.mode = mode
        self.tgt_vocab_table = target_vocab_table

        self.tgt_vocab_size = hparams.tgt_vocab_size
        self.num_layers = hparams.num_layers
        self.num_gpus = hparams.num_gpus
        self.time_major = hparams.time_major

        self.cnn_input = self.iterator.source
        if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
            self.cnn = AlexNet(self.cnn_input, (1 - hparams.dropout), model_helper.get_device_str(hparams.base_gpu))
        else:
            self.cnn = AlexNet(self.cnn_input, 1, model_helper.get_device_str(hparams.base_gpu))

        # Initializer
        initializer = model_helper.get_initializer(hparams.init_op, hparams.random_seed, hparams.init_weight)
        tf.get_variable_scope().set_initializer(initializer)

        # Embeddings
        self.init_embeddings(hparams, scope)
        self.batch_size = tf.size(self.iterator.source_sequence_length)

        # Projection
        with tf.variable_scope(scope or "build_network"):
            with tf.variable_scope("decoder/output_projection"):
                self.output_layer = layers_core.Dense(hparams.tgt_vocab_size, use_bias=False, name="output_projection")

        # To make it flexible for external code to add other cell types
        # If not specified, we will later use model_helper._single_cell
        self.single_cell_fn = single_cell_fn

        ## Train graph
        res = self.build_graph(hparams, scope=scope)

        if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
            self.train_loss = res[1]
            self.word_count = tf.reduce_sum(self.iterator.target_sequence_length)
        elif self.mode == tf.contrib.learn.ModeKeys.EVAL:
            self.eval_loss = res[1]
        elif self.mode == tf.contrib.learn.ModeKeys.INFER:
            self.infer_logits, _, self.final_context_state, self.sample_id = res
            self.sample_words = reverse_target_vocab_table.lookup(tf.to_int64(self.sample_id))

        if self.mode != tf.contrib.learn.ModeKeys.INFER:
            ## Count the number of predicted words for compute ppl.
            self.predict_count = tf.reduce_sum(self.iterator.target_sequence_length)

        ## Learning rate
        print("  start_decay_step=%d, learning_rate=%g, decay_steps %d, decay_factor %g" % (hparams.start_decay_step, hparams.learning_rate, hparams.decay_steps, hparams.decay_factor))

        self.global_step = tf.Variable(0, trainable=False)

        params = tf.trainable_variables()

        # Gradients and SGD update operation for training the model.
        # Arrage for the embedding vars to appear at the beginning.
        if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
            if hparams.optimizer == "sgd":
                self.learning_rate = tf.cond(self.global_step < hparams.start_decay_step,
                                             lambda: tf.constant(hparams.learning_rate),
                                             lambda: tf.train.exponential_decay(hparams.learning_rate,
                                                                                (self.global_step - hparams.start_decay_step),
                                                                                hparams.decay_steps,
                                                                                hparams.decay_factor,
                                                                                staircase=True),
                                             name="learning_rate")
                opt = tf.train.GradientDescentOptimizer(self.learning_rate)
                tf.summary.scalar("lr", self.learning_rate)

            elif hparams.optimizer == "adam":
                assert float(hparams.learning_rate) <= 0.001, "! High Adam learning rate %g" % hparams.learning_rate
                self.learning_rate = tf.constant(hparams.learning_rate)
                opt = tf.train.AdamOptimizer(self.learning_rate)

            gradients = tf.gradients(self.train_loss, params, colocate_gradients_with_ops=hparams.colocate_gradients_with_ops)

            clipped_gradients, gradient_norm_summary = model_helper.gradient_clip(gradients, max_gradient_norm=hparams.max_gradient_norm)

            self.update = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step)

            # Summary
            self.train_summary = tf.summary.merge([tf.summary.scalar("lr", self.learning_rate), tf.summary.scalar("train_loss", self.train_loss)] + gradient_norm_summary)

        if self.mode == tf.contrib.learn.ModeKeys.INFER:
            self.infer_summary = self._get_infer_summary(hparams)

        # Saver
        if hparams.eval_on_fly:
            self.saver = tf.train.Saver(tf.global_variables(), save_relative_paths= True)
        else:
            self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=None, save_relative_paths= True)

        # Print trainable variables
        utils.print_out("# Trainable variables")
        for param in params:
            utils.print_out("  %s, %s, %s" % (param.name, str(param.get_shape()), param.op.device))
    def _build_decoder_cell(self, hparams, encoder_outputs, encoder_state,
                            source_sequence_length):
        """Build a RNN cell with attention mechanism that can be used by decoder."""
        # No Attention
        if not self.has_attention:
            return super(AttentionModel,
                         self)._build_decoder_cell(hparams, encoder_outputs,
                                                   encoder_state,
                                                   source_sequence_length)
        elif hparams.attention_architecture != "standard":
            raise ValueError("Unknown attention architecture %s" %
                             hparams.attention_architecture)

        num_units = hparams.num_units
        num_layers = self.num_decoder_layers
        #num_residual_layers = self.num_decoder_residual_layers
        infer_mode = hparams.infer_mode

        dtype = tf.float32

        # Ensure memory is batch-major
        if self.time_major:
            memory = tf.transpose(encoder_outputs, [1, 0, 2])
        else:
            memory = encoder_outputs

        if (self.mode == tf.contrib.learn.ModeKeys.INFER
                and infer_mode == "beam_search"):
            memory, source_sequence_length, encoder_state, batch_size = (
                self._prepare_beam_search_decoder_inputs(
                    hparams.beam_width, memory, source_sequence_length,
                    encoder_state))
        else:
            batch_size = self.batch_size

        # Attention
        attention_mechanism = self.attention_mechanism_fn(
            hparams.attention, num_units, memory, source_sequence_length,
            self.mode)

        cell = model_helper.create_rnn_cell(unit_type=hparams.unit_type,
                                            num_units=num_units,
                                            num_layers=num_layers,
                                            forget_bias=hparams.forget_bias,
                                            dropout=hparams.dropout,
                                            num_gpus=self.num_gpus,
                                            mode=self.mode,
                                            single_cell_fn=self.single_cell_fn)

        # Only generate alignment in greedy INFER mode.
        alignment_history = (self.mode == tf.contrib.learn.ModeKeys.INFER
                             and infer_mode != "beam_search")
        cell = tf.contrib.seq2seq.AttentionWrapper(
            cell,
            attention_mechanism,
            attention_layer_size=num_units,
            alignment_history=alignment_history,
            output_attention=hparams.output_attention,
            name="attention")

        # TODO(thangluong): do we need num_layers, num_gpus?
        cell = tf.contrib.rnn.DeviceWrapper(
            cell, model_helper.get_device_str(num_layers - 1, self.num_gpus))

        if hparams.pass_hidden_state:
            decoder_initial_state = cell.zero_state(
                batch_size, dtype).clone(cell_state=encoder_state)
        else:
            decoder_initial_state = cell.zero_state(batch_size, dtype)

        return cell, decoder_initial_state
Esempio n. 12
0
def build_graph(model, hparams, scope=None):
    """build the computation graph."""
    utils.print_out("# creating %s graph ..." % model.mode)
    dtype = tf.float32
    num_layers = hparams.num_layers
    num_gpus = hparams.num_gpus

    with tf.variable_scope(scope or "dynamic_seq2seq", dtype=dtype):
        # Encoder
        # Look up embedding, emp_inp: [max_time, batch_size, num_units]
        with tf.variable_scope("encoder_emb_inp"):
            encoder_emb_inp = tf.nn.embedding_lookup(model.embedding_encoder,
                                                     model.iterator.source)
            action_emb_inp = tf.nn.embedding_lookup(model.embedding_encoder,
                                                    model.iterator.action)
        with tf.variable_scope("encoder1_intent"):
            res = _build_encoder_simple(model,
                                        model.iterator.intent,
                                        model.iterator.intent_len,
                                        num_units=hparams.encoder_intent_unit)
            _, encoder_state1_aux, _ = res
        with tf.variable_scope("encoder2_kb"):
            res = _build_encoder_hierarchial(model,
                                             model.iterator.kb,
                                             num_units=hparams.encoder_kb_unit)
            _, encoder_state2_aux, _ = res

        with tf.variable_scope("encoder1"):
            model.encoder_input_projection1 = layers_core.Dense(
                hparams.num_units,
                use_bias=False,
                name="encoder_1_input_projection")
            tiled_encoder_state1_aux = tf.reshape(
                encoder_state1_aux,
                [model.batch_size, 1, hparams.encoder_intent_unit])
            time_step = tf.shape(encoder_emb_inp)[1]
            tiled_encoder_state1_aux = tf.tile(tiled_encoder_state1_aux,
                                               [1, time_step, 1])
            concat1 = tf.concat([encoder_emb_inp, tiled_encoder_state1_aux],
                                2)  # emb_intnt+num_unites
            encoder1_input = model.encoder_input_projection1(concat1)
            encoder_outputs1, encoder_state1 = _build_encoder(
                model, encoder1_input, hparams)  # 1= customer, 2= agent

        with tf.variable_scope("encoder2"):
            model.encoder_input_projection2 = layers_core.Dense(
                hparams.num_units,
                use_bias=False,
                name="encoder_2_input_projection")
            tiled_encoder_state2_aux = tf.reshape(
                encoder_state2_aux,
                [model.batch_size, 1, hparams.encoder_kb_unit])
            time_step = tf.shape(encoder_emb_inp)[1]
            tiled_encoder_state2_aux = tf.tile(tiled_encoder_state2_aux,
                                               [1, time_step, 1])
            concat2 = tf.concat([encoder_emb_inp, tiled_encoder_state2_aux],
                                2)  # emb_intnt+num_unites
            encoder2_input = model.encoder_input_projection2(concat2)
            encoder_outputs2, encoder_state2 = _build_encoder(
                model, encoder2_input, hparams)

        ## Decoder
        with tf.variable_scope("decoder1"):
            res = _build_decoder(model, encoder_outputs1, encoder_state1,
                                 hparams, vocab_utils.start_of_turn1,
                                 vocab_utils.start_of_turn2,
                                 model.output_layer1, encoder_state1_aux)
            logits_trian1, _, sample_id_train1, sample_id_infer1 = res

        with tf.variable_scope("decoder2"):
            res = _build_decoder(model, encoder_outputs2, encoder_state2,
                                 hparams, vocab_utils.start_of_turn2,
                                 vocab_utils.start_of_turn1,
                                 model.output_layer2, encoder_state2_aux)
            logits_trian2, _, sample_id_train2, sample_id_infer2 = res

        with tf.variable_scope("decoder_action"):
            res = _build_decoder_action(
                model,
                encoder_state2,
                hparams,
                hparams.t1,  # dialogue ends with t2, action starts with t1
                hparams.t2,
                model.output_layer_action)
            logits_trian3, _, sample_id_train3, sample_id_infer3 = res

        with tf.variable_scope("value_network1"):
            res = _build_value_network(model, encoder_emb_inp, action_emb_inp,
                                       encoder_state1_aux, model.vn_project11,
                                       model.vn_project12, hparams)
            dialogue1_val, _ = res
        with tf.variable_scope("value_network2"):
            res = _build_value_network(model, encoder_emb_inp, action_emb_inp,
                                       encoder_state2_aux, model.vn_project21,
                                       model.vn_project22, hparams, True)
            dialogue2_val, action_val = res

            model.logits_trian1 = logits_trian1
            model.logits_trian2 = logits_trian2
            model.dialogue1_val = dialogue1_val
            model.dialogue2_val = dialogue2_val

        if model.mode in [
                tf.contrib.learn.ModeKeys.TRAIN,
                tf.contrib.learn.ModeKeys.EVAL,
                dialogue_utils.mode_self_play_mutable
        ]:
            with tf.device(
                    model_helper.get_device_str(num_layers - 1, num_gpus)):
                sl_loss, sl_loss_arr = _compute_loss(model, logits_trian1,
                                                     logits_trian2,
                                                     logits_trian3)

            with tf.device(
                    model_helper.get_device_str(num_layers - 1, num_gpus)):
                rl_loss_arr = _compute_loss_selfplay(model, logits_trian1,
                                                     logits_trian2,
                                                     logits_trian3,
                                                     dialogue1_val,
                                                     dialogue2_val, action_val)

        elif model.mode == tf.contrib.learn.ModeKeys.INFER or model.mode == dialogue_utils.mode_self_play_immutable:
            sl_loss, sl_loss_arr, rl_loss_arr = None, None, None
        else:
            raise ValueError("mode not known")

        sample_id_arr_train = [
            sample_id_train1, sample_id_train2, sample_id_train3
        ]
        sample_id_arr_infer = [
            sample_id_infer1, sample_id_infer2, sample_id_infer3
        ]

        return sl_loss, sl_loss_arr, rl_loss_arr, sample_id_arr_train, sample_id_arr_infer
Esempio n. 13
0
    def _build_decoder_cell(self, hparams, encoder_outputs, encoder_state,
                            source_sequence_length):
        """Build a RNN cell with attention mechanism that can be used by decoder."""
        attention_option = hparams.attention
        attention_architecture = hparams.attention_architecture

        if attention_architecture != "standard":
            raise ValueError("Unknown attention architecture %s" %
                             attention_architecture)

        num_units = hparams.num_units
        num_layers = hparams.num_layers
        num_residual_layers = hparams.num_residual_layers
        num_gpus = hparams.num_gpus
        beam_width = hparams.beam_width

        dtype = tf.float32

        if self.time_major:
            memory = tf.transpose(encoder_outputs, [1, 0, 2])
        else:
            memory = encoder_outputs

        if self.mode == tf.contrib.learn.ModeKeys.INFER and beam_width > 0:
            memory = tf.contrib.seq2seq.tile_batch(memory,
                                                   multiplier=beam_width)
            source_sequence_length = tf.contrib.seq2seq.tile_batch(
                source_sequence_length, multiplier=beam_width)
            encoder_state = tf.contrib.seq2seq.tile_batch(
                encoder_state, multiplier=beam_width)
            batch_size = self.batch_size * beam_width
        else:
            batch_size = self.batch_size

        if hparams.model in ('model0', 'model1', 'model2'):
            att_memory = tf.contrib.layers.fully_connected(
                memory,
                num_units,
                activation_fn=None,
                weights_initializer=tf.random_uniform_initializer(-0.1, 0.1))

            cell = NTMCell(num_layers,
                           num_units,
                           use_att_memory=True,
                           att_memory=att_memory,
                           att_memory_size=hparams.src_max_len,
                           att_memory_vector_dim=num_units,
                           use_ext_memory=(hparams.model == 'model2'),
                           ext_memory_size=hparams.num_memory_locations
                           if hparams.model == 'model2' else None,
                           ext_memory_vector_dim=hparams.memory_unit_size
                           if hparams.model == 'model2' else None,
                           ext_read_head_num=hparams.read_heads
                           if hparams.model == 'model2' else None,
                           ext_write_head_num=hparams.write_heads
                           if hparams.model == 'model2' else None,
                           dropout=hparams.dropout,
                           batch_size=batch_size,
                           mode=self.mode,
                           output_dim=num_units,
                           addressing_mode='content' if hparams.model
                           == 'model0' else 'content_and_location')

            decoder_initial_state = cell.zero_state(batch_size, dtype)

            if hparams.pass_hidden_state:
                decoder_initial_state = tuple([encoder_state] +
                                              list(decoder_initial_state[1:]))
        else:
            attention_mechanism = create_attention_mechanism(
                attention_option, num_units, memory, source_sequence_length)

            cell = model_helper.create_rnn_cell(
                unit_type=hparams.unit_type,
                num_units=num_units,
                num_layers=num_layers,
                num_residual_layers=num_residual_layers,
                forget_bias=hparams.forget_bias,
                dropout=hparams.dropout,
                num_gpus=num_gpus,
                mode=self.mode,
                single_cell_fn=self.single_cell_fn,
                num_proj=None,
                num_cells=2 if (hparams.encoder_type == "bi") else 1)

            # Only generate alignment in greedy INFER mode.
            alignment_history = (self.mode == tf.contrib.learn.ModeKeys.INFER
                                 and beam_width == 0)
            cell = tf.contrib.seq2seq.AttentionWrapper(
                cell,
                attention_mechanism,
                attention_layer_size=num_units,
                alignment_history=alignment_history,
                name="attention")

            # TODO(thangluong): do we need num_layers, num_gpus?
            cell = tf.contrib.rnn.DeviceWrapper(
                cell, model_helper.get_device_str(num_layers - 1, num_gpus))

            if hparams.pass_hidden_state:
                decoder_initial_state = cell.zero_state(
                    batch_size, dtype).clone(cell_state=encoder_state)
            else:
                decoder_initial_state = cell.zero_state(batch_size, dtype)

        return cell, decoder_initial_state
Esempio n. 14
0
def rpn_base(inputs,
             hp,
             im_info,
             bbox_labels,
             feat_stride=16,
             anchor_count=9,
             trainable=True):
    """
    A Region Proposal Network (RPN) takes an image (of any size) as input and outputs a set of
    rectangular object proposals, each with an objectness score[quoted from Faster-RCNN]
    :param inputs: image
    :param hp: hyper parameters
    :param im_info: image size [height, width, channel]
    :param feat_stride: num of division
    :param anchor_count: original generate anchors' count
    :param trainable: whether to train this network
    :param bbox_labels: target bounding box
    :return: rect object proposals, scores, print_pool(dict) for debugging, activation(dict) for visualization
    """
    # For debugging
    print_pool = dict()
    # For activation
    activation = None
    with tf.variable_scope("rpn"):
        with tf.device(helper.get_device_str(device_id=0,
                                             num_gpus=hp.num_gpus)):
            # Build the anchors for image
            anchors, all_count = helper.generate_img_anchors(
                im_info,
                feat_stride,
                ratios=hp.anchor_ratios,
                scales=hp.anchor_scales)
            print_pool.update(anchor_shape=tf.shape(anchors))
            # rpn_conv = layers.conv2d(inputs, hp.rpn_channel, [3, 3], trainable=self.trainable,
            #                          weights_initializer=self.initializer, scope="rpn_conv_3x3")
            rpn_conv = slim.conv2d(inputs,
                                   hp.rpn_channel, [3, 3],
                                   trainable=trainable,
                                   scope="rpn_conv_3x3")
            # Visualize rpn
            activation = rpn_conv
            probs, predicts, scores, reshaped_scores = _rpn_cls_layer(
                rpn_conv, anchor_count * 2)
            deltas = _rpn_reg_layer(rpn_conv, anchor_count * 4)
            print_pool.update(deltas_shape=tf.shape(deltas))
            rpn_info = dict()
            if trainable:
                # Generate rois, roi scores on image
                rois, roi_scores = helper.sample_rois_from_anchors(
                    probs, deltas, im_info, anchors, anchor_count)
                # Gather info for calculating rpn's loss
                # Fill rpn's bbox_label, class_label, in_weights, out_weights
                rpn_info = helper.pack_anchor_info(
                    im_info,
                    anchors,
                    ori_anchor_count=anchor_count,
                    bbox_targets=tf.squeeze(bbox_labels, axis=0),
                    anchor_scores=scores)
            else:
                # Why use probs as scores?
                rois, _ = helper.sample_rois_from_anchors(
                    probs, deltas, im_info, anchors, anchor_count)
            # Fill rest info of rpn
            misc.append_params(
                rpn_info,
                rois=rois,
                class_probs=probs,
                class_predicts=predicts,
                class_reshaped_scores=reshaped_scores,
                sigma=hp.rpn_sigma,
                # Using the full score instead of roi_score so that gradient
                # can back passing all params in rpn_cls_layer
                bbox_predicts=deltas,
                bbox_scores=scores)
    return rois, rpn_info, print_pool, activation
Esempio n. 15
0
    def _build_decoder(self, encoder_outputs, encoder_state, hparams):
        """Build and run a RNN decoder with a final projection layer.

    Args:
      encoder_outputs: The outputs of encoder for every time step.
      encoder_state: The final state of the encoder.
      hparams: The Hyperparameters configurations.

    Returns:
      A tuple of final logits and final decoder state:
        logits: size [time, batch_size, vocab_size] when time_major=True.
    """
        tgt_sos_id = tf.cast(
            self.tgt_vocab_table.lookup(tf.constant(hparams.sos)), tf.int32)
        tgt_eos_id = tf.cast(
            self.tgt_vocab_table.lookup(tf.constant(hparams.eos)), tf.int32)

        num_layers = hparams.num_layers
        num_gpus = hparams.num_gpus

        iterator = self.iterator

        # maximum_iteration: The maximum decoding steps.
        maximum_iterations = self._get_infer_maximum_iterations(
            hparams, iterator.source_sequence_length)

        ## Decoder.
        with tf.variable_scope("decoder") as decoder_scope:
            cell, decoder_initial_state = self._build_decoder_cell(
                hparams, encoder_outputs, encoder_state,
                iterator.source_sequence_length)

            ## Train or eval
            if self.mode != tf.contrib.learn.ModeKeys.INFER:
                # decoder_emp_inp: [max_time, batch_size, num_units]
                target_input = iterator.target_input
                if self.time_major:
                    target_input = tf.transpose(target_input)
                decoder_emb_inp = tf.nn.embedding_lookup(
                    self.embedding_decoder, target_input)

                # Helper
                helper = tf.contrib.seq2seq.TrainingHelper(
                    decoder_emb_inp,
                    iterator.target_sequence_length,
                    time_major=self.time_major)

                # Decoder
                my_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell,
                    helper,
                    decoder_initial_state,
                )

                # Dynamic decoding
                outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    my_decoder,
                    output_time_major=self.time_major,
                    swap_memory=True,
                    scope=decoder_scope)

                sample_id = outputs.sample_id

                # Note: there's a subtle difference here between train and inference.
                # We could have set output_layer when create my_decoder
                #   and shared more code between train and inference.
                # We chose to apply the output_layer to all timesteps for speed:
                #   10% improvements for small models & 20% for larger ones.
                # If memory is a concern, we should apply output_layer per timestep.
                device_id = num_layers if num_layers < num_gpus else (
                    num_layers - 1)
                with tf.device(model_helper.get_device_str(
                        device_id, num_gpus)):
                    logits = self.output_layer(outputs.rnn_output)

            ## Inference
            else:
                beam_width = hparams.beam_width
                length_penalty_weight = hparams.length_penalty_weight
                start_tokens = tf.fill([self.batch_size], tgt_sos_id)
                end_token = tgt_eos_id

                if beam_width > 0:
                    my_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                        cell=cell,
                        embedding=self.embedding_decoder,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=decoder_initial_state,
                        beam_width=beam_width,
                        output_layer=self.output_layer,
                        length_penalty_weight=length_penalty_weight)
                else:
                    # Helper
                    helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                        self.embedding_decoder, start_tokens, end_token)

                    # Decoder
                    my_decoder = tf.contrib.seq2seq.BasicDecoder(
                        cell,
                        helper,
                        decoder_initial_state,
                        output_layer=self.output_layer  # applied per timestep
                    )

                # Dynamic decoding
                outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    my_decoder,
                    maximum_iterations=maximum_iterations,
                    output_time_major=self.time_major,
                    swap_memory=True,
                    scope=decoder_scope)

                if beam_width > 0:
                    logits = tf.no_op()
                    sample_id = outputs.predicted_ids
                else:
                    logits = outputs.rnn_output
                    sample_id = outputs.sample_id

        return logits, sample_id, final_context_state
Esempio n. 16
0
    def build_graph(self, hparams, scope=None):
        """Subclass must implement this method.

    Creates a sequence-to-sequence model with dynamic RNN decoder API.
    Args:
      hparams: Hyperparameter configurations.
      scope: VariableScope for the created subgraph; default "dynamic_seq2seq".

    Returns:
      A tuple of the form (logits, loss_tuple, final_context_state, sample_id),
      where:
        logits: float32 Tensor [batch_size x num_decoder_symbols].
        loss: loss = the total loss / batch_size.
        final_context_state: the final state of decoder RNN.
        sample_id: sampling indices.

    Raises:
      ValueError: if encoder_type differs from mono and bi, or
        attention_option is not (luong | scaled_luong |
        bahdanau | normed_bahdanau).
    """
        utils.print_out("\n# Creating %s graph ..." % self.mode)

        # Projection
        with tf.variable_scope(scope or "build_network"):
            with tf.variable_scope("decoder/output_projection"):
                self.output_layer = tf.layers.Dense(self.tgt_vocab_size,
                                                    use_bias=False,
                                                    name="output_projection")

        with tf.variable_scope(scope or "dynamic_seq2seq", dtype=self.dtype):
            # Encoder
            self.encoder_outputs, encoder_state = self._build_encoder(hparams)
            fw_state, bw_state = encoder_state
            print('encoder_outputs: ', self.encoder_outputs.shape)
            print('fw_state.h: ', fw_state.h.shape)
            print('bw_state.h: ', bw_state.h.shape)

            # Linear layer for classification of intent
            encoder_last_state = tf.concat([fw_state.h, bw_state.h], axis=1)
            print('encoder_last_state: ', encoder_last_state.shape)
            print()

            encoder_output_size = encoder_last_state.get_shape()[1].value
            print('encoder_output_size: ', encoder_output_size)
            w = tf.get_variable('w',
                                [encoder_output_size, self.lbl_vocab_size],
                                dtype=tf.float32)
            w_t = tf.transpose(w)
            v = tf.get_variable('v', [self.lbl_vocab_size], dtype=tf.float32)

            # apply the linear layer
            label_logits = tf.nn.xw_plus_b(encoder_last_state, w, v)
            label_pred = tf.argmax(label_logits, 1)
            print('label_scores: ', label_logits.shape)
            print()

            ## Decoder
            slot_logits, decoder_cell_outputs, sample_id, final_context_state = (
                self._build_decoder(self.encoder_outputs, encoder_state,
                                    hparams))

            ## Loss
            if self.mode != tf.contrib.learn.ModeKeys.INFER:
                with tf.device(
                        model_helper.get_device_str(
                            self.num_encoder_layers - 1, self.num_gpus)):
                    loss = self._compute_loss(label_logits, slot_logits,
                                              decoder_cell_outputs)
            else:
                loss = [tf.constant(0.0), tf.constant(0.0)]

            return [label_logits, slot_logits], loss, final_context_state, \
                   sample_id, label_pred
Esempio n. 17
0
    def _build_decoder(self, encoder_outputs, encoder_state, hparams):
        """Build and run a RNN decoder with a final projection layer.

    Args:
      encoder_outputs: The outputs of encoder for every time step.
      encoder_state: The final state of the encoder.
      hparams: The Hyperparameters configurations.

    Returns:
      A tuple of final logits and final decoder state:
        logits: size [time, batch_size, vocab_size] when time_major=True.
    """
        tgt_sos_id = tf.cast(
            self.tgt_vocab_table.lookup(tf.constant(hparams.sos)), tf.int32)
        tgt_eos_id = tf.cast(
            self.tgt_vocab_table.lookup(tf.constant(hparams.eos)), tf.int32)
        iterator = self.iterator

        # maximum_iteration: The maximum decoding steps.
        maximum_iterations = self._get_infer_maximum_iterations(
            hparams, iterator.source_sequence_length)

        ## Decoder.
        with tf.variable_scope("decoder") as decoder_scope:
            cell, decoder_initial_state = self._build_decoder_cell(
                hparams, encoder_outputs, encoder_state,
                iterator.source_sequence_length)

            # Optional ops depends on which mode we are in and which loss function we
            # are using.
            logits = tf.no_op()
            decoder_cell_outputs = None

            ## Train or eval
            if self.mode != tf.contrib.learn.ModeKeys.INFER:
                # decoder_emp_inp: [max_time, batch_size, num_units]
                target_input = iterator.target_input
                if self.time_major:
                    target_input = tf.transpose(target_input)
                decoder_emb_inp = tf.nn.embedding_lookup(
                    self.embedding_decoder, target_input)

                # Helper
                helper = tf.contrib.seq2seq.TrainingHelper(
                    decoder_emb_inp,
                    iterator.target_sequence_length,
                    time_major=self.time_major)

                # Decoder
                my_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell,
                    helper,
                    decoder_initial_state,
                )

                # Dynamic decoding
                outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    my_decoder,
                    output_time_major=self.time_major,
                    swap_memory=True,
                    scope=decoder_scope)

                sample_id = outputs.sample_id

                if self.num_sampled_softmax > 0:
                    # Note: this is required when using sampled_softmax_loss.
                    decoder_cell_outputs = outputs.rnn_output

                # Note: there's a subtle difference here between train and inference.
                # We could have set output_layer when create my_decoder
                #   and shared more code between train and inference.
                # We chose to apply the output_layer to all timesteps for speed:
                #   10% improvements for small models & 20% for larger ones.
                # If memory is a concern, we should apply output_layer per timestep.
                num_layers = self.num_decoder_layers
                num_gpus = self.num_gpus
                device_id = num_layers if num_layers < num_gpus else (
                    num_layers - 1)
                # Colocate output layer with the last RNN cell if there is no extra GPU
                # available. Otherwise, put last layer on a separate GPU.
                with tf.device(model_helper.get_device_str(
                        device_id, num_gpus)):
                    logits = self.output_layer(outputs.rnn_output)

                if self.num_sampled_softmax > 0:
                    logits = tf.no_op(
                    )  # unused when using sampled softmax loss.

            ## Inference
            else:
                infer_mode = hparams.infer_mode
                start_tokens = tf.fill([self.batch_size], tgt_sos_id)
                end_token = tgt_eos_id
                utils.print_out("  decoder: infer_mode=%sbeam_width=%d, "
                                "length_penalty=%f, coverage_penalty=%f" %
                                (infer_mode, hparams.beam_width,
                                 hparams.length_penalty_weight,
                                 hparams.coverage_penalty_weight))

                if infer_mode == "beam_search":
                    beam_width = hparams.beam_width
                    length_penalty_weight = hparams.length_penalty_weight
                    coverage_penalty_weight = hparams.coverage_penalty_weight

                    my_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                        cell=cell,
                        embedding=self.embedding_decoder,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=decoder_initial_state,
                        beam_width=beam_width,
                        output_layer=self.output_layer,
                        length_penalty_weight=length_penalty_weight,
                        coverage_penalty_weight=coverage_penalty_weight)
                elif infer_mode == "sample":
                    # Helper
                    sampling_temperature = hparams.sampling_temperature
                    assert sampling_temperature > 0.0, (
                        "sampling_temperature must greater than 0.0 when using sample"
                        " decoder.")
                    helper = tf.contrib.seq2seq.SampleEmbeddingHelper(
                        self.embedding_decoder,
                        start_tokens,
                        end_token,
                        softmax_temperature=sampling_temperature,
                        seed=self.random_seed)
                elif infer_mode == "greedy":
                    helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                        self.embedding_decoder, start_tokens, end_token)
                else:
                    raise ValueError("Unknown infer_mode '%s'", infer_mode)

                if infer_mode != "beam_search":
                    my_decoder = tf.contrib.seq2seq.BasicDecoder(
                        cell,
                        helper,
                        decoder_initial_state,
                        output_layer=self.output_layer  # applied per timestep
                    )

                # Dynamic decoding
                outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    my_decoder,
                    maximum_iterations=maximum_iterations,
                    output_time_major=self.time_major,
                    swap_memory=True,
                    scope=decoder_scope)

                if infer_mode == "beam_search":
                    sample_id = outputs.predicted_ids
                else:
                    logits = outputs.rnn_output
                    sample_id = outputs.sample_id

        return logits, decoder_cell_outputs, sample_id, final_context_state
Esempio n. 18
0
    def build_graph(self, hparams, scope=None):
        """Subclass must implement this method.

    Creates a sequence-to-sequence model with dynamic RNN decoder API.
    Args:
      hparams: Hyperparameter configurations.
      scope: VariableScope for the created subgraph; default "dynamic_seq2seq".

    Returns:
      A tuple of the form (logits, loss, final_context_state),
      where:
        logits: float32 Tensor [batch_size x num_decoder_symbols].
        loss: the total loss / batch_size.
        final_context_state: The final state of decoder RNN.

    Raises:
      ValueError: if encoder_type differs from mono and bi, or
        attention_option is not (luong | scaled_luong |
        bahdanau | normed_bahdanau).
    """
        utils.print_out("# creating %s graph ..." % self.mode)
        dtype = tf.float32
        num_layers = hparams.num_layers
        num_gpus = hparams.num_gpus

        if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
            self.use_fed_source = tf.placeholder(tf.bool)
            self.fed_source = tf.placeholder(tf.int32,
                                             shape=(None, hparams.src_max_len))

        with tf.variable_scope(scope or "dynamic_seq2seq", dtype=dtype):
            # Encoder
            encoder_outputs, encoder_state = self._build_encoder(hparams)

            ## Decoder
            logits, sample_id, final_context_state = self._build_decoder(
                encoder_outputs, encoder_state, hparams)

            if hparams.beam_width > 0 and self.mode == tf.contrib.learn.ModeKeys.INFER:
                cell_state = final_context_state.cell_state
            else:
                cell_state = final_context_state

            if hparams.mann == 'ntm':
                if hparams.model in ('model0', 'model1'):
                    print('here', final_context_state)
                    final_state = Model1NTMState(*cell_state)
                elif hparams.model == 'model2':
                    final_state = Model2NTMState(*cell_state)
                else:
                    final_state = Model3NTMState(*cell_state)

            self.att_w_history = tf.no_op()
            self.ext_w_history = tf.no_op()
            if hparams.record_w_history:
                if hparams.mann == 'ntm' and hparams.model in (
                        'model0', 'model1', 'model2', 'model3'):
                    att_w_history = final_state.att_w_history.stack()
                    self.att_w_history = tf.transpose(att_w_history, [1, 2, 0])
                if hparams.mann == 'ntm' and hparams.model in ('model2',
                                                               'model3'):
                    self.ext_w_history = map(
                        lambda hist: tf.transpose(hist.stack(), [1, 2, 0]),
                        final_state.ext_w_history)

        ## Loss
        if self.mode != tf.contrib.learn.ModeKeys.INFER:
            with tf.device(
                    model_helper.get_device_str(num_layers - 1, num_gpus)):
                loss = self._compute_loss(logits)
        else:
            loss = None

        return logits, loss, final_context_state, sample_id