def _build_output(self, output_dict): ''' Take RNN outputs and produce logits over the vocab. ''' inputs = transpose_first_two_dims(output_dict['outputs']) # (batch_size, seq_len, embed_size) logits = self._build_logits(inputs) return logits
def embed_context(self): embeddings = [] for i in xrange(self.num_context): inputs, mask = self.context_embedder.build_seq_inputs(self.context[i], self.word_embedder, self.pad, time_major=False) embeddings.append(self.context_embedder.embed(inputs, mask, integer=False)['embedding']) embeddings = tf.stack(embeddings) # (num_context, batch_size, embed_size) embeddings = transpose_first_two_dims(embeddings) embeddings = tf.reduce_sum(embeddings, axis=1) return embeddings # (batch_size, embed_size)
def _build_output(self, output_dict): ''' Take RNN outputs and produce logits over the vocab and the attentions. ''' logits = super(CopyGraphDecoder, self)._build_output( output_dict) # (batch_size, seq_len, num_symbols) attn_scores = transpose_first_two_dims( output_dict['attn_scores']) # (batch_size, seq_len, num_nodes) return tf.concat(2, [logits, attn_scores])
def _tile_inputs(self, input_dict, multiplier): with tf.name_scope('tile_inputs'): if hasattr(self.decoder, 'context_embedding'): self.decoder.context_embedding = tile_tensor(self.decoder.context_embedding, multiplier) for input_name, tensor in input_dict.iteritems(): if input_name == 'init_cell_state': input_dict[input_name] = tile_tensor(tensor, multiplier) elif input_name == 'encoder_embeddings': # (seq_len, batch_size, embed_size) input_dict[input_name] = transpose_first_two_dims( tile_tensor(transpose_first_two_dims(tensor), multiplier)) elif input_name == 'inputs': continue elif input_name == 'price_history': # TODO continue else: print '{} not tiled'.format(input_name) raise ValueError return input_dict
def _build_rnn_inputs(self, inputs, input_dict): inputs, mask, kwargs = super(AttentionDecoder, self)._build_rnn_inputs(inputs, input_dict) encoder_outputs = input_dict['encoder_embeddings'] self.feedable_vars['encoder_outputs'] = encoder_outputs encoder_embeddings = transpose_first_two_dims(encoder_outputs) # (batch_size, seq_len, embed_size) attention_memory = self.context_embedding + [encoder_embeddings] kwargs['attention_memory'] = attention_memory # mask doesn't seem to matter #kwargs['attention_mask'] = self.context_embedder.get_mask('title') return inputs, mask, kwargs
def _build_output(self, output_dict): ''' Take RNN outputs and produce logits over the vocab. ''' outputs = output_dict['outputs'] outputs = transpose_first_two_dims( outputs) # (batch_size, seq_len, output_size) logits = batch_linear(outputs, self.num_symbols, True) #logits = BasicDecoder.penalize_repetition(logits) return logits
def _build_output(self, output_dict): vocab_logits = super(GatedCopyGraphDecoder, self)._build_output( output_dict) # (batch_size, seq_len, num_symbols) attn_scores = transpose_first_two_dims( output_dict['attn_scores']) # (batch_size, seq_len, num_nodes) rnn_outputs = transpose_first_two_dims( output_dict['outputs']) # (batch_size, seq_len, output_size) with tf.variable_scope('Gating'): prob_vocab = tf.sigmoid(batch_linear( rnn_outputs, 1, True)) # (batch_size, seq_len, 1) prob_copy = 1 - prob_vocab log_prob_vocab = tf.log(prob_vocab + EPS) log_prob_copy = tf.log(prob_copy + EPS) # Reweight the vocab and attn distribution and convert them to logits vocab_logits = log_prob_vocab + vocab_logits - tf.reduce_logsumexp( vocab_logits, 2, keep_dims=True) attn_logits = log_prob_copy + attn_scores - tf.reduce_logsumexp( attn_scores, 2, keep_dims=True) return tf.concat(2, [vocab_logits, attn_logits]), tf.concat( 2, [log_prob_vocab, log_prob_copy])
def _embed_seq(self, sequence, step=False): inputs, mask = self.seq_embedder.build_seq_inputs(sequence, self.word_embedder, self.pad, time_major=False) embeddings = self.seq_embedder.embed(inputs, mask, integer=False, init_state=None) if not step: return embeddings['embedding'] else: return transpose_first_two_dims(embeddings['step_embeddings'])
def _build_rnn_inputs(self, time_major): inputs = super(GraphDecoder, self)._build_rnn_inputs(time_major) checklists = tf.cumsum(tf.one_hot( self.entities, self.num_nodes, on_value=1, off_value=0), axis=1) + self.init_checklists # cumsum can cause >1 indicator checklists = tf.cast(tf.greater(checklists, 0), tf.float32) self.output_dict['checklists'] = checklists checklists = transpose_first_two_dims( checklists) # (seq_len, batch_size, num_nodes) return inputs, checklists
def update_price(self, partner, inputs, init_price=None): ''' Update price history given the inputs: inputs = [[1, 1]] partner_input = False init_price[:, 0, :] = [[0, 2], [1, 3]] Return updated price vector: [[2, 1], [3, 1]] ''' # Change to time major inputs = transpose_first_two_dims(inputs) update_func = self._update_partner_price if partner else self._update_self_price if init_price is None: init_price = self.init_price price_hists = tf.scan(update_func, inputs, initializer=init_price) return price_hists
def build_model(self, input_dict, tf_variables): with tf.variable_scope(type(self).__name__): self.decoder.build_model(input_dict, tf_variables) # NOTE: output from rnn is time major # context: hidden states at each time step context = transpose_first_two_dims(self.decoder.output_dict['outputs']) self.price_inputs = tf.placeholder(tf.float32, shape=[None, None], name='price_inputs') # (batch_size, seq_len) self.price_targets = tf.placeholder(tf.float32, shape=[None, None], name='price_targets') # (batch_size, seq_len) init_price = input_dict['price_history'] # (batch_size, price_size) # NOTE: no price updating during decoding predicted_prices = self.price_predictor.predict_price(init_price, context) # Update price after decoding. partner = False new_price_history_seq = self.price_predictor.update_price(False, self.price_targets, init_price=init_price) # Outputs self.output_dict = dict(self.decoder.output_dict) self.output_dict['price_history'] = new_price_history_seq[-1, :, :] self.output_dict['price_preds'] = predicted_prices
def _build_rnn_inputs(self, time_major, **kwargs): ''' Concatenate word embedding with entity/node embedding. ''' word_embedder = self.word_embedder inputs = kwargs.get('inputs', self.inputs) entities = kwargs.get('entities', self.entities) context = kwargs.get('context', self.context) word_embeddings = word_embedder.embed(inputs, zero_pad=True) if self.node_embed_in_rnn_inputs: # stop_gradien: tLook up node embeddings but don't back propogate (would be recursive) entity_embeddings = tf.stop_gradient( self._get_node_embedding(context[0], entities)) inputs = tf.concat(2, [word_embeddings, entity_embeddings]) else: inputs = word_embeddings if not time_major: inputs = transpose_first_two_dims( inputs) # (seq_len, batch_size, input_size) return inputs