Ejemplo n.º 1
0
    def begin(self):
        if not self.is_chief:
            return

        #pylint: disable=W0201
        features = graph_utils.get_dict_from_collection("features")
        labels = graph_utils.get_dict_from_collection("labels")

        self._num_tokens_tensor = tf.constant(0)
        if "source_len" in features:
            self._num_tokens_tensor += tf.reduce_sum(features["source_len"])
        if "target_len" in labels:
            self._num_tokens_tensor += tf.reduce_sum(labels["target_len"])

        self._tokens_last_step = 0
        self._global_step_tensor = tf.train.get_global_step()

        # Create a variable that stores how many tokens have been processed
        # Should be global for distributed training
        with tf.variable_scope("tokens_counter"):
            self._tokens_processed_var = tf.get_variable(
                name="count",
                shape=[],
                dtype=tf.int32,
                initializer=tf.constant_initializer(0, dtype=tf.int32))
            self._tokens_processed_add = tf.assign_add(
                self._tokens_processed_var, self._num_tokens_tensor)
Ejemplo n.º 2
0
 def begin(self):
     self._iter_count = 0
     self._global_step = tf.train.get_global_step()
     self._pred_dict = graph_utils.get_dict_from_collection("predictions")
     ##self._logits_infer = graph_utils.get_dict_from_collection("logits_infer")
     self._logits_softmax = graph_utils.get_dict_from_collection(
         "logits_softmax")
     # Create the sample directory
     if self._sample_dir is not None:
         gfile.MakeDirs(self._sample_dir)
Ejemplo n.º 3
0
 def begin(self):
   self._global_step = tf.train.get_global_step()
   self._pred_dict = graph_utils.get_dict_from_collection("predictions")
   self._features = graph_utils.get_dict_from_collection("features")
   self._iter_count = 0
   self._eval_str = ""
   self._current_global_step = None
   # Create the sample directory
   if self._evalution_result_dir is not None:
     if os.path.exists(self._evalution_result_dir) is False:
       gfile.MakeDirs(self._evalution_result_dir)
       os.chmod(self._evalution_result_dir, 777)
Ejemplo n.º 4
0
  def _create_predictions(self, decoder_output, features, labels, losses=None):
    """Creates the dictionary of predictions that is returned by the model.
    """
    predictions = {}

    # Add features and, if available, labels to predictions
    predictions.update(_flatten_dict({"features": features}))
    if labels is not None:
      predictions.update(_flatten_dict({"labels": labels}))

    if losses is not None:
      predictions["losses"] = _transpose_batch_time(losses)

    # Decoders returns output in time-major form [T, B, ...]
    # Here we transpose everything back to batch-major for the user
    output_dict = collections.OrderedDict(
        zip(decoder_output._fields, decoder_output))
    decoder_output_flat = _flatten_dict(output_dict)
    
    decoder_output_flat = {
        k: _transpose_batch_time(v)
        for k, v in decoder_output_flat.items()
    }
    predictions.update(decoder_output_flat)

    # If we predict the ids also map them back into the vocab and process them
    if "predicted_ids" in predictions.keys():
      vocab_tables = graph_utils.get_dict_from_collection("vocab_tables")
      target_id_to_vocab = vocab_tables["target_id_to_vocab"]
      predicted_tokens = target_id_to_vocab.lookup(
          tf.to_int64(predictions["predicted_ids"]))
      # Raw predicted tokens
      predictions["predicted_tokens"] = predicted_tokens

    return predictions
Ejemplo n.º 5
0
  def _create_predictions(self, decoder_output, features, labels, losses=None):
    """Creates the dictionary of predictions that is returned by the model.
    """
    predictions = {}

    # Add features and, if available, labels to predictions
    predictions.update(_flatten_dict({"features": features}))
    if labels is not None:
      predictions.update(_flatten_dict({"labels": labels}))

    if losses is not None:
      predictions["losses"] = _transpose_batch_time(losses)

    # Decoders returns output in time-major form [T, B, ...]
    # Here we transpose everything back to batch-major for the user
    output_dict = collections.OrderedDict(
        zip(decoder_output._fields, decoder_output))
    decoder_output_flat = _flatten_dict(output_dict)
    decoder_output_flat = {
        k: _transpose_batch_time(v)
        for k, v in decoder_output_flat.items()
    }
    predictions.update(decoder_output_flat)

    # If we predict the ids also map them back into the vocab and process them
    if "predicted_ids" in predictions.keys():
      vocab_tables = graph_utils.get_dict_from_collection("vocab_tables")
      target_id_to_vocab = vocab_tables["target_id_to_vocab"]
      predicted_tokens = target_id_to_vocab.lookup(
          tf.to_int64(predictions["predicted_ids"]))
      # Raw predicted tokens
      predictions["predicted_tokens"] = predicted_tokens

    return predictions
Ejemplo n.º 6
0
  def compute_loss(self, decoder_output, features, labels):
    """Computes the loss for this model.

    Returns a tuple `(losses, loss)`, where `losses` are the per-batch
    losses and loss is a single scalar tensor to minimize.
    """
    #pylint: disable=R0201
    # Calculate loss per example-timestep of shape [B, T]

    # Get the original loss from the model
    losses, loss = super(DiscriminatorSeq2Seq, self).compute_loss(
        decoder_output, features, labels)

    # Add the Discriminator
    encoder_output = graph_utils.get_dict_from_collection("encoder_output")
    with tf.variable_scope("discriminator"):
      discriminator_loss, disc_context = self._discriminator(
        encoder_output=encoder_output,
        features=features,
        labels=labels,
        num_classes=2,
        num_units=self.params["discriminator_units"])

    discriminator_loss *= self.params["discriminator_loss_multiplier"]
    tf.summary.scalar("loss/discriminator", discriminator_loss)
    tf.summary.scalar("loss/generator", loss)

    # Add discriminator loss to original loss (training only)
    if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
      loss = loss + discriminator_loss

    tf.summary.scalar("loss/total", loss)

    return losses, loss
Ejemplo n.º 7
0
 def begin(self):
   self._iter_count = 0
   self._global_step = tf.train.get_global_step()
   self._pred_dict = graph_utils.get_dict_from_collection("predictions")
   # Create the sample directory
   if self._sample_dir is not None:
     gfile.MakeDirs(self._sample_dir)
Ejemplo n.º 8
0
 def begin(self):
   self._iter_count = 0
   self._global_step = training_util.get_global_step()
   self._pred_dict = graph_utils.get_dict_from_collection("predictions")
   # Create the sample directory
   if self._sample_dir is not None:
     gfile.MakeDirs(self._sample_dir)
 def _set_special_vocab_ids(self):
     # Find out the IDs for special vocab
     vocab_tables = graph_utils.get_dict_from_collection("vocab_tables")
     target_vocab_to_id = vocab_tables["target_vocab_to_id"]
     self.copy_word_id = target_vocab_to_id.lookup(
         tf.convert_to_tensor("COPY_WORD", dtype=tf.string))
     self.copy_schema_id = target_vocab_to_id.lookup(
         tf.convert_to_tensor("COPY_SCHEMA", dtype=tf.string))
    def _create_decoder(self, encoder_output, features, _labels):
        # TODO: This whole method is copied from schema_attention_seq2seq.py.
        # Use multiple inheritance to avoid this? 
        attention_class = locate(self.params["attention.class"]) or \
                          getattr(decoders.attention, 
                                  self.params["attention.class"])
        attention_layer = attention_class(
            params=self.params["attention.params"], mode=self.mode)

        schema_attention_class = locate(self.params["schema.attention.class"]) or \
                                 getattr(decoders.attention, 
                                         self.params["schema.attention.class"])
        schema_attention_layer = schema_attention_class(
            params=self.params["schema.attention.params"], mode=self.mode)

        # If the input sequence is reversed we also need to reverse
        # the attention scores.
        reverse_scores_lengths = None
        if self.params["source.reverse"]:
            reverse_scores_lengths = features["source_len"]
        if self.use_beam_search:
            reverse_scores_lengths = tf.tile(
                input=reverse_scores_lengths,
                multiples=[self.params["inference.beam_search.beam_width"]])

        schema_tables = graph_utils.get_dict_from_collection("schema_tables")
        schema_locs = features['schema_loc'] 
        table = schema_tables["schema_file_lookup_table"]
        ids = table.lookup(schema_locs)
        all_schema_embeddings = schema_tables["all_schema_embeddings"]
        schema_embeddings_3d = tf.squeeze(tf.gather(all_schema_embeddings, ids), [1])
        schema_lengths = schema_tables["schema_lengths"]
        schema_attn_values_length = tf.squeeze(tf.gather(schema_lengths, ids), [1])
        # schema_embeddings_file = self.params["schema.location"]
        # print("Loading schema embeddings from %s" % schema_embeddings_file)
        # schema_embeddings_matrix_np = np.load(schema_embeddings_file)
        # schema_embeddings_matrix = tf.constant(schema_embeddings_matrix_np, 
        #                                        dtype=tf.float32)
        # batch_size = tf.shape(encoder_output.attention_values_length)[0]
        # schema_embeddings_3d = tf.tile(tf.expand_dims(schema_embeddings_matrix, 0), 
        #                                tf.stack([batch_size, 1, 1]))
        # schema_attn_values_length = tf.tile(tf.expand_dims(
        #     tf.shape(schema_embeddings_matrix)[1], 0), [batch_size])

        return self.decoder_class(
            params=self.params["decoder.params"],
            mode=self.mode,
            vocab_size=self.target_vocab_info.total_size,
            attention_values=encoder_output.attention_values,
            attention_values_length=encoder_output.attention_values_length,
            attention_keys=encoder_output.outputs,
            attention_fn=attention_layer,
            reverse_scores_lengths=reverse_scores_lengths,
            schema_attention_keys=schema_embeddings_3d,
            schema_attention_values=schema_embeddings_3d,
            schema_attention_values_length=schema_attn_values_length,
            schema_attention_fn=schema_attention_layer
        )
 def _get_predicted_tokens(self, predictions):
     if "predicted_tokens" in predictions.keys():
         output_predicted_tokens = predictions["predicted_tokens"]
     else:
         vocab_tables = graph_utils.get_dict_from_collection("vocab_tables")
         target_id_to_vocab = vocab_tables["target_id_to_vocab"]
         output_predicted_tokens = target_id_to_vocab.lookup(
             tf.to_int64(predictions["predicted_ids"]))
     return output_predicted_tokens
Ejemplo n.º 12
0
 def begin(self):
     self._predictions = graph_utils.get_dict_from_collection("predictions")
     self.write_cnt = 0
     self.sample_cnt = 0
     self.infer_outs = []
     self.attn_scores_list = []
     self.run_cnt = 0
     if self._save_pred_path is not None:
         self._pred_fout = codecs.open(self._save_pred_path, "w", "utf-8")
     if self._attn_path is not None:
         self._attn_fout = codecs.open(self._attn_path, "wb")
Ejemplo n.º 13
0
    def __init__(self,
                 delimiter=" ",
                 tokens_feature_name="tokens",
                 length_feature_name="length",
                 schema_copy_feature_name="schema_copy_indices",
                 prepend_token=None,
                 append_token=None):
        super(SchemaCopyingDecoder,
              self).__init__(delimiter=delimiter,
                             tokens_feature_name=tokens_feature_name,
                             length_feature_name=length_feature_name,
                             prepend_token=prepend_token,
                             append_token=append_token)
        self.schema_copy_feature_name = schema_copy_feature_name

        schema_tables = graph_utils.get_dict_from_collection("schema_tables")
        self.schema_lookup_table = schema_tables["schema_file_lookup_table"]
        self.schema_strings_table = schema_tables["all_schema_strings"]
Ejemplo n.º 14
0
    def source_embedding(self):
        """Returns the embedding used for the source sequence.
    """
        if self.params["embedding.source_embedding"] is not None:
            vocab_tables = graph_utils.get_dict_from_collection("vocab_tables")
            source_vocab_to_id = vocab_tables["source_vocab_to_id"]
            source_vocab = vocab_tables["source_vocab"]

            return self.load_embedding(
                self.params["embedding.source_embedding"], source_vocab,
                source_vocab_to_id)

        return tf.get_variable(name="W",
                               shape=[
                                   self.source_vocab_info.total_size,
                                   self.params["embedding.dim"]
                               ],
                               initializer=tf.random_uniform_initializer(
                                   -self.params["embedding.init_scale"],
                                   self.params["embedding.init_scale"]))
Ejemplo n.º 15
0
    def create_predictions(self,
                           decoder_output,
                           features,
                           labels,
                           losses=None):
        """Creates the dictionary of predictions that is returned by the model.
    """
        predictions = {}

        # Add features and, if available, labels to predictions
        predictions.update(_flatten_dict({"features": features}))
        if labels is not None:
            predictions.update(_flatten_dict({"labels": labels}))

        if losses is not None:
            predictions["losses"] = losses

        # Decoders returns output in time-major form [T, B, ...]
        # Here we transpose everything back to batch-major for the user
        output_dict = collections.OrderedDict(
            zip(decoder_output._fields, decoder_output))
        decoder_output_flat = _flatten_dict(output_dict)
        predictions.update(decoder_output_flat)

        # If we predict the ids also map them back into the vocab and process them
        if "predicted_ids" in predictions.keys():
            predicted_tk_id = predictions["predicted_ids"]
            vocab_tables = graph_utils.get_dict_from_collection("vocab_tables")
            target_id_to_vocab = vocab_tables["target_id_to_vocab"]
            predicted_tokens = target_id_to_vocab.lookup(
                tf.to_int64(predicted_tk_id))
            # Raw predicted tokens
            # Force to reshape to [batch_size, 1, 1] so that it has same shape as regular seq2seq,
            # and all post process code can be reused.
            #predicted_tokens = tf.reshape(predicted_tokens, [tf.shape(predicted_tokens)[0], 1, 1])
            predictions["predicted_tokens"] = predicted_tokens

        return predictions
    def _create_predictions(self,
                            decoder_output,
                            features,
                            labels,
                            losses=None):
        predictions = super(SchemaAttentionCopyingSeq2Seq,
                            self)._create_predictions(decoder_output, features,
                                                      labels, losses)
        if "predicted_ids" in predictions.keys():
            prediction_ids = predictions["predicted_ids"]
            output_predicted_tokens = self._get_predicted_tokens(predictions)
            if self.copy_schema_id is None:
                self._set_special_vocab_ids()

            schema_attention_copy_vals = predictions[
                "schema_attention_copy_vals"]

            # Build the schema_tokens structure from the list of schema locations.
            schema_tables = graph_utils.get_dict_from_collection(
                "schema_tables")
            schema_locs = features['schema_loc']
            table = schema_tables["schema_file_lookup_table"]
            ids = table.lookup(schema_locs)
            all_schema_strings = schema_tables["all_schema_strings"]
            schema_tokens = tf.squeeze(all_schema_strings.lookup(ids), axis=1)
            schema_tokens = tf.sparse_tensor_to_dense(
                tf.string_split(schema_tokens),
                default_value="UNK",
                name="desparsifying_schema_tokens")

            # Figure out copy_schema_predicted_tokens
            predicted_tokens = self._fill_in_copies(
                prediction_ids, output_predicted_tokens,
                schema_attention_copy_vals, schema_tokens, self.copy_schema_id)

            predictions["predicted_tokens"] = predicted_tokens
        return predictions
    def _create_decoder(self, encoder_output, features, _labels):
        attention_class = locate(self.params["attention.class"]) or \
                          getattr(decoders.attention,
                                  self.params["attention.class"])
        attention_layer = attention_class(
            params=self.params["attention.params"],
            mode=self.mode,
            name="attention_to_input_layer")

        # If the input sequence is reversed we also need to reverse
        # the attention scores.
        reverse_scores_lengths = None
        if self.params["source.reverse"]:
            reverse_scores_lengths = features["source_len"]
        if self.use_beam_search:
            reverse_scores_lengths = tf.tile(
                input=reverse_scores_lengths,
                multiples=[self.params["inference.beam_search.beam_width"]])

        if "schema" in self.params["decoder.class"].lower():
            schema_attention_class = locate(
                self.params["schema.attention.class"]) or \
                getattr(decoders.attention,
                        self.params["schema.attention.class"])
            schema_attention_layer = schema_attention_class(
                params=self.params["schema.attention.params"],
                mode=self.mode,
                name="attention_to_schema_layer")

            schema_tables = graph_utils.get_dict_from_collection(
                "schema_tables")
            schema_locs = features['schema_loc']
            table = schema_tables["schema_file_lookup_table"]
            ids = table.lookup(schema_locs)
            all_schema_embeddings = schema_tables["all_schema_embeddings"]
            schema_embeddings_3d = tf.squeeze(
                tf.gather(all_schema_embeddings, ids), [1])
            schema_lengths = schema_tables["schema_lengths"]
            schema_attn_values_length = tf.squeeze(
                tf.gather(schema_lengths, ids), [1])

            return self.decoder_class(
                params=self.params["decoder.params"],
                mode=self.mode,
                vocab_size=self.target_vocab_info.total_size,
                attention_values=encoder_output.attention_values,
                attention_values_length=encoder_output.attention_values_length,
                attention_keys=encoder_output.outputs,
                attention_fn=attention_layer,
                reverse_scores_lengths=reverse_scores_lengths,
                schema_attention_keys=schema_embeddings_3d,
                schema_attention_values=schema_embeddings_3d,
                schema_attention_values_length=schema_attn_values_length,
                schema_attention_fn=schema_attention_layer)
        return self.decoder_class(
            params=self.params["decoder.params"],
            mode=self.mode,
            vocab_size=self.target_vocab_info.total_size,
            attention_values=encoder_output.attention_values,
            attention_values_length=encoder_output.attention_values_length,
            attention_keys=encoder_output.outputs,
            attention_fn=attention_layer,
            reverse_scores_lengths=reverse_scores_lengths)
 def _get_tables_and_ids(self, features):
     schema_tables = graph_utils.get_dict_from_collection("schema_tables")
     schema_locs = features['schema_loc']
     table = schema_tables["schema_file_lookup_table"]
     ids = table.lookup(schema_locs)
     return (schema_tables, ids)
# A hacky way to retrieve prediction result from the task hook...
prediction_dict = {}


def _save_prediction_to_dict(source_tokens, predicted_tokens):
    prediction_dict[_tokens_to_str(source_tokens)] = _tokens_to_str(
        predicted_tokens)


# print("create session")
# sess = tf.train.MonitoredSession(
#   session_creator=session_creator,
#   hooks=[DecodeOnce({}, callback_func=_save_prediction_to_dict)])

_predictions = graph_utils.get_dict_from_collection("predictions")
fetches = {}
fetches["predicted_tokens"] = _predictions["predicted_tokens"]
fetches["predicted_ids"] = _predictions["predicted_ids"]
fetches["features.source_tokens"] = _predictions["features.source_tokens"]
fetches["features.source_candidate_tokens"] = _predictions[
    "features.source_candidate_tokens"]
#fetches["beam_parent_ids"] = _predictions["beam_search_output.beam_parent_ids"]
tf.train.SessionRunArgs(fetches)
sess = tf.Session()
sess.run([
    tf.global_variables_initializer(),
    tf.local_variables_initializer(),
    tf.tables_initializer()
])
saver.restore(sess, checkpoint_path)
Ejemplo n.º 20
0
 def begin(self):
   self._predictions = graph_utils.get_dict_from_collection("predictions")