def begin(self): if not self.is_chief: return #pylint: disable=W0201 features = graph_utils.get_dict_from_collection("features") labels = graph_utils.get_dict_from_collection("labels") self._num_tokens_tensor = tf.constant(0) if "source_len" in features: self._num_tokens_tensor += tf.reduce_sum(features["source_len"]) if "target_len" in labels: self._num_tokens_tensor += tf.reduce_sum(labels["target_len"]) self._tokens_last_step = 0 self._global_step_tensor = tf.train.get_global_step() # Create a variable that stores how many tokens have been processed # Should be global for distributed training with tf.variable_scope("tokens_counter"): self._tokens_processed_var = tf.get_variable( name="count", shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0, dtype=tf.int32)) self._tokens_processed_add = tf.assign_add( self._tokens_processed_var, self._num_tokens_tensor)
def begin(self): self._iter_count = 0 self._global_step = tf.train.get_global_step() self._pred_dict = graph_utils.get_dict_from_collection("predictions") ##self._logits_infer = graph_utils.get_dict_from_collection("logits_infer") self._logits_softmax = graph_utils.get_dict_from_collection( "logits_softmax") # Create the sample directory if self._sample_dir is not None: gfile.MakeDirs(self._sample_dir)
def begin(self): self._global_step = tf.train.get_global_step() self._pred_dict = graph_utils.get_dict_from_collection("predictions") self._features = graph_utils.get_dict_from_collection("features") self._iter_count = 0 self._eval_str = "" self._current_global_step = None # Create the sample directory if self._evalution_result_dir is not None: if os.path.exists(self._evalution_result_dir) is False: gfile.MakeDirs(self._evalution_result_dir) os.chmod(self._evalution_result_dir, 777)
def _create_predictions(self, decoder_output, features, labels, losses=None): """Creates the dictionary of predictions that is returned by the model. """ predictions = {} # Add features and, if available, labels to predictions predictions.update(_flatten_dict({"features": features})) if labels is not None: predictions.update(_flatten_dict({"labels": labels})) if losses is not None: predictions["losses"] = _transpose_batch_time(losses) # Decoders returns output in time-major form [T, B, ...] # Here we transpose everything back to batch-major for the user output_dict = collections.OrderedDict( zip(decoder_output._fields, decoder_output)) decoder_output_flat = _flatten_dict(output_dict) decoder_output_flat = { k: _transpose_batch_time(v) for k, v in decoder_output_flat.items() } predictions.update(decoder_output_flat) # If we predict the ids also map them back into the vocab and process them if "predicted_ids" in predictions.keys(): vocab_tables = graph_utils.get_dict_from_collection("vocab_tables") target_id_to_vocab = vocab_tables["target_id_to_vocab"] predicted_tokens = target_id_to_vocab.lookup( tf.to_int64(predictions["predicted_ids"])) # Raw predicted tokens predictions["predicted_tokens"] = predicted_tokens return predictions
def compute_loss(self, decoder_output, features, labels): """Computes the loss for this model. Returns a tuple `(losses, loss)`, where `losses` are the per-batch losses and loss is a single scalar tensor to minimize. """ #pylint: disable=R0201 # Calculate loss per example-timestep of shape [B, T] # Get the original loss from the model losses, loss = super(DiscriminatorSeq2Seq, self).compute_loss( decoder_output, features, labels) # Add the Discriminator encoder_output = graph_utils.get_dict_from_collection("encoder_output") with tf.variable_scope("discriminator"): discriminator_loss, disc_context = self._discriminator( encoder_output=encoder_output, features=features, labels=labels, num_classes=2, num_units=self.params["discriminator_units"]) discriminator_loss *= self.params["discriminator_loss_multiplier"] tf.summary.scalar("loss/discriminator", discriminator_loss) tf.summary.scalar("loss/generator", loss) # Add discriminator loss to original loss (training only) if self.mode == tf.contrib.learn.ModeKeys.TRAIN: loss = loss + discriminator_loss tf.summary.scalar("loss/total", loss) return losses, loss
def begin(self): self._iter_count = 0 self._global_step = tf.train.get_global_step() self._pred_dict = graph_utils.get_dict_from_collection("predictions") # Create the sample directory if self._sample_dir is not None: gfile.MakeDirs(self._sample_dir)
def begin(self): self._iter_count = 0 self._global_step = training_util.get_global_step() self._pred_dict = graph_utils.get_dict_from_collection("predictions") # Create the sample directory if self._sample_dir is not None: gfile.MakeDirs(self._sample_dir)
def _set_special_vocab_ids(self): # Find out the IDs for special vocab vocab_tables = graph_utils.get_dict_from_collection("vocab_tables") target_vocab_to_id = vocab_tables["target_vocab_to_id"] self.copy_word_id = target_vocab_to_id.lookup( tf.convert_to_tensor("COPY_WORD", dtype=tf.string)) self.copy_schema_id = target_vocab_to_id.lookup( tf.convert_to_tensor("COPY_SCHEMA", dtype=tf.string))
def _create_decoder(self, encoder_output, features, _labels): # TODO: This whole method is copied from schema_attention_seq2seq.py. # Use multiple inheritance to avoid this? attention_class = locate(self.params["attention.class"]) or \ getattr(decoders.attention, self.params["attention.class"]) attention_layer = attention_class( params=self.params["attention.params"], mode=self.mode) schema_attention_class = locate(self.params["schema.attention.class"]) or \ getattr(decoders.attention, self.params["schema.attention.class"]) schema_attention_layer = schema_attention_class( params=self.params["schema.attention.params"], mode=self.mode) # If the input sequence is reversed we also need to reverse # the attention scores. reverse_scores_lengths = None if self.params["source.reverse"]: reverse_scores_lengths = features["source_len"] if self.use_beam_search: reverse_scores_lengths = tf.tile( input=reverse_scores_lengths, multiples=[self.params["inference.beam_search.beam_width"]]) schema_tables = graph_utils.get_dict_from_collection("schema_tables") schema_locs = features['schema_loc'] table = schema_tables["schema_file_lookup_table"] ids = table.lookup(schema_locs) all_schema_embeddings = schema_tables["all_schema_embeddings"] schema_embeddings_3d = tf.squeeze(tf.gather(all_schema_embeddings, ids), [1]) schema_lengths = schema_tables["schema_lengths"] schema_attn_values_length = tf.squeeze(tf.gather(schema_lengths, ids), [1]) # schema_embeddings_file = self.params["schema.location"] # print("Loading schema embeddings from %s" % schema_embeddings_file) # schema_embeddings_matrix_np = np.load(schema_embeddings_file) # schema_embeddings_matrix = tf.constant(schema_embeddings_matrix_np, # dtype=tf.float32) # batch_size = tf.shape(encoder_output.attention_values_length)[0] # schema_embeddings_3d = tf.tile(tf.expand_dims(schema_embeddings_matrix, 0), # tf.stack([batch_size, 1, 1])) # schema_attn_values_length = tf.tile(tf.expand_dims( # tf.shape(schema_embeddings_matrix)[1], 0), [batch_size]) return self.decoder_class( params=self.params["decoder.params"], mode=self.mode, vocab_size=self.target_vocab_info.total_size, attention_values=encoder_output.attention_values, attention_values_length=encoder_output.attention_values_length, attention_keys=encoder_output.outputs, attention_fn=attention_layer, reverse_scores_lengths=reverse_scores_lengths, schema_attention_keys=schema_embeddings_3d, schema_attention_values=schema_embeddings_3d, schema_attention_values_length=schema_attn_values_length, schema_attention_fn=schema_attention_layer )
def _get_predicted_tokens(self, predictions): if "predicted_tokens" in predictions.keys(): output_predicted_tokens = predictions["predicted_tokens"] else: vocab_tables = graph_utils.get_dict_from_collection("vocab_tables") target_id_to_vocab = vocab_tables["target_id_to_vocab"] output_predicted_tokens = target_id_to_vocab.lookup( tf.to_int64(predictions["predicted_ids"])) return output_predicted_tokens
def begin(self): self._predictions = graph_utils.get_dict_from_collection("predictions") self.write_cnt = 0 self.sample_cnt = 0 self.infer_outs = [] self.attn_scores_list = [] self.run_cnt = 0 if self._save_pred_path is not None: self._pred_fout = codecs.open(self._save_pred_path, "w", "utf-8") if self._attn_path is not None: self._attn_fout = codecs.open(self._attn_path, "wb")
def __init__(self, delimiter=" ", tokens_feature_name="tokens", length_feature_name="length", schema_copy_feature_name="schema_copy_indices", prepend_token=None, append_token=None): super(SchemaCopyingDecoder, self).__init__(delimiter=delimiter, tokens_feature_name=tokens_feature_name, length_feature_name=length_feature_name, prepend_token=prepend_token, append_token=append_token) self.schema_copy_feature_name = schema_copy_feature_name schema_tables = graph_utils.get_dict_from_collection("schema_tables") self.schema_lookup_table = schema_tables["schema_file_lookup_table"] self.schema_strings_table = schema_tables["all_schema_strings"]
def source_embedding(self): """Returns the embedding used for the source sequence. """ if self.params["embedding.source_embedding"] is not None: vocab_tables = graph_utils.get_dict_from_collection("vocab_tables") source_vocab_to_id = vocab_tables["source_vocab_to_id"] source_vocab = vocab_tables["source_vocab"] return self.load_embedding( self.params["embedding.source_embedding"], source_vocab, source_vocab_to_id) return tf.get_variable(name="W", shape=[ self.source_vocab_info.total_size, self.params["embedding.dim"] ], initializer=tf.random_uniform_initializer( -self.params["embedding.init_scale"], self.params["embedding.init_scale"]))
def create_predictions(self, decoder_output, features, labels, losses=None): """Creates the dictionary of predictions that is returned by the model. """ predictions = {} # Add features and, if available, labels to predictions predictions.update(_flatten_dict({"features": features})) if labels is not None: predictions.update(_flatten_dict({"labels": labels})) if losses is not None: predictions["losses"] = losses # Decoders returns output in time-major form [T, B, ...] # Here we transpose everything back to batch-major for the user output_dict = collections.OrderedDict( zip(decoder_output._fields, decoder_output)) decoder_output_flat = _flatten_dict(output_dict) predictions.update(decoder_output_flat) # If we predict the ids also map them back into the vocab and process them if "predicted_ids" in predictions.keys(): predicted_tk_id = predictions["predicted_ids"] vocab_tables = graph_utils.get_dict_from_collection("vocab_tables") target_id_to_vocab = vocab_tables["target_id_to_vocab"] predicted_tokens = target_id_to_vocab.lookup( tf.to_int64(predicted_tk_id)) # Raw predicted tokens # Force to reshape to [batch_size, 1, 1] so that it has same shape as regular seq2seq, # and all post process code can be reused. #predicted_tokens = tf.reshape(predicted_tokens, [tf.shape(predicted_tokens)[0], 1, 1]) predictions["predicted_tokens"] = predicted_tokens return predictions
def _create_predictions(self, decoder_output, features, labels, losses=None): predictions = super(SchemaAttentionCopyingSeq2Seq, self)._create_predictions(decoder_output, features, labels, losses) if "predicted_ids" in predictions.keys(): prediction_ids = predictions["predicted_ids"] output_predicted_tokens = self._get_predicted_tokens(predictions) if self.copy_schema_id is None: self._set_special_vocab_ids() schema_attention_copy_vals = predictions[ "schema_attention_copy_vals"] # Build the schema_tokens structure from the list of schema locations. schema_tables = graph_utils.get_dict_from_collection( "schema_tables") schema_locs = features['schema_loc'] table = schema_tables["schema_file_lookup_table"] ids = table.lookup(schema_locs) all_schema_strings = schema_tables["all_schema_strings"] schema_tokens = tf.squeeze(all_schema_strings.lookup(ids), axis=1) schema_tokens = tf.sparse_tensor_to_dense( tf.string_split(schema_tokens), default_value="UNK", name="desparsifying_schema_tokens") # Figure out copy_schema_predicted_tokens predicted_tokens = self._fill_in_copies( prediction_ids, output_predicted_tokens, schema_attention_copy_vals, schema_tokens, self.copy_schema_id) predictions["predicted_tokens"] = predicted_tokens return predictions
def _create_decoder(self, encoder_output, features, _labels): attention_class = locate(self.params["attention.class"]) or \ getattr(decoders.attention, self.params["attention.class"]) attention_layer = attention_class( params=self.params["attention.params"], mode=self.mode, name="attention_to_input_layer") # If the input sequence is reversed we also need to reverse # the attention scores. reverse_scores_lengths = None if self.params["source.reverse"]: reverse_scores_lengths = features["source_len"] if self.use_beam_search: reverse_scores_lengths = tf.tile( input=reverse_scores_lengths, multiples=[self.params["inference.beam_search.beam_width"]]) if "schema" in self.params["decoder.class"].lower(): schema_attention_class = locate( self.params["schema.attention.class"]) or \ getattr(decoders.attention, self.params["schema.attention.class"]) schema_attention_layer = schema_attention_class( params=self.params["schema.attention.params"], mode=self.mode, name="attention_to_schema_layer") schema_tables = graph_utils.get_dict_from_collection( "schema_tables") schema_locs = features['schema_loc'] table = schema_tables["schema_file_lookup_table"] ids = table.lookup(schema_locs) all_schema_embeddings = schema_tables["all_schema_embeddings"] schema_embeddings_3d = tf.squeeze( tf.gather(all_schema_embeddings, ids), [1]) schema_lengths = schema_tables["schema_lengths"] schema_attn_values_length = tf.squeeze( tf.gather(schema_lengths, ids), [1]) return self.decoder_class( params=self.params["decoder.params"], mode=self.mode, vocab_size=self.target_vocab_info.total_size, attention_values=encoder_output.attention_values, attention_values_length=encoder_output.attention_values_length, attention_keys=encoder_output.outputs, attention_fn=attention_layer, reverse_scores_lengths=reverse_scores_lengths, schema_attention_keys=schema_embeddings_3d, schema_attention_values=schema_embeddings_3d, schema_attention_values_length=schema_attn_values_length, schema_attention_fn=schema_attention_layer) return self.decoder_class( params=self.params["decoder.params"], mode=self.mode, vocab_size=self.target_vocab_info.total_size, attention_values=encoder_output.attention_values, attention_values_length=encoder_output.attention_values_length, attention_keys=encoder_output.outputs, attention_fn=attention_layer, reverse_scores_lengths=reverse_scores_lengths)
def _get_tables_and_ids(self, features): schema_tables = graph_utils.get_dict_from_collection("schema_tables") schema_locs = features['schema_loc'] table = schema_tables["schema_file_lookup_table"] ids = table.lookup(schema_locs) return (schema_tables, ids)
# A hacky way to retrieve prediction result from the task hook... prediction_dict = {} def _save_prediction_to_dict(source_tokens, predicted_tokens): prediction_dict[_tokens_to_str(source_tokens)] = _tokens_to_str( predicted_tokens) # print("create session") # sess = tf.train.MonitoredSession( # session_creator=session_creator, # hooks=[DecodeOnce({}, callback_func=_save_prediction_to_dict)]) _predictions = graph_utils.get_dict_from_collection("predictions") fetches = {} fetches["predicted_tokens"] = _predictions["predicted_tokens"] fetches["predicted_ids"] = _predictions["predicted_ids"] fetches["features.source_tokens"] = _predictions["features.source_tokens"] fetches["features.source_candidate_tokens"] = _predictions[ "features.source_candidate_tokens"] #fetches["beam_parent_ids"] = _predictions["beam_search_output.beam_parent_ids"] tf.train.SessionRunArgs(fetches) sess = tf.Session() sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer(), tf.tables_initializer() ]) saver.restore(sess, checkpoint_path)
def begin(self): self._predictions = graph_utils.get_dict_from_collection("predictions")