예제 #1
0
def wide_model(numeric_input, category_input, vocabs):
    transpose_category_input = tf.transpose(category_input)
    category_sum = None
    # Append embadding category to numeric_sum
    for i in range(0, len(vocabs)):
        embedding = tf.get_variable(
            "wideem" + str(i), [vocabs[i], 8],
            initializer=tf.contrib.layers.xavier_initializer()
            #partitioner=tf.fixed_size_partitioner(n_pss))
            #partitioner=tf.min_max_variable_partitioner(n_pss, 0, 2 << 10)
        )
        # Pick one column from category input
        col = tf.gather(transpose_category_input, [i])[0]
        #col = tf.nn.embedding_lookup(transpose_category_input, [i])[0]

        # Same as make [0001]*[w1,w2,w3,w4] = lookup w4
        #embedded_col = embedding_lookup(tf.identity(embedding), col)  # number * embedding output number
        embedded_col = embedding_ops.embedding_lookup_unique(embedding, col)

        if category_sum is None:
            category_sum = embedded_col
        else:
            category_sum = tf.concat([category_sum, embedded_col], 1)

    tf.set_random_seed(1)
    w = tf.get_variable("W",
                        [numeric_input.shape[1] + category_sum.shape[1], 1],
                        initializer=tf.contrib.layers.xavier_initializer())
    wmodel_logits_sum = tf.matmul(tf.concat([numeric_input, category_sum], 1),
                                  w)

    return wmodel_logits_sum
예제 #2
0
def embed_sequence(
        ids,
        sess,  #添加
        vocab_size=None,
        embed_dim=None,
        unique=False,
        initializer=None,
        regularizer=None,
        trainable=True,
        scope=None,
        reuse=None):

    if not (reuse or (vocab_size and embed_dim)):
        raise ValueError(
            'Must specify vocab size and embedding dimension when not '
            'reusing. Got vocab_size=%s and embed_dim=%s' %
            (vocab_size, embed_dim))
    with variable_scope.variable_scope(scope,
                                       'EmbedSequence', [ids],
                                       reuse=reuse):
        shape = [vocab_size, embed_dim]
        if reuse and vocab_size is None or embed_dim is None:
            shape = None
        embeddings = model_variable(
            'embeddings',
            sess,  #添加
            shape=shape,
            initializer=initializer,
            regularizer=regularizer,
            trainable=trainable)
        print(sess.run(embeddings))
        if unique:
            return contrib_embedding_ops.embedding_lookup_unique(
                embeddings, ids)
        return embedding_ops.embedding_lookup(embeddings, ids)
예제 #3
0
def wide_model(numeric_input, category_input, vocabs):
    transpose_category_input = tf.transpose(category_input)
    category_sum = None
    # Append embadding category to numeric_sum
    for i in range(0, len(vocabs)):
        embedding = tf.get_variable("wideem" + str(i), [vocabs[i], 8],
                                    initializer=tf.contrib.layers.xavier_initializer()
                                    #partitioner=tf.fixed_size_partitioner(n_pss))
                                    #partitioner=tf.min_max_variable_partitioner(n_pss, 0, 2 << 10)
                                    )
        # Pick one column from category input
        col = tf.gather(transpose_category_input, [i])[0]
        #col = tf.nn.embedding_lookup(transpose_category_input, [i])[0]

        # Same as make [0001]*[w1,w2,w3,w4] = lookup w4
        #embedded_col = embedding_lookup(tf.identity(embedding), col)  # number * embedding output number
        embedded_col = embedding_ops.embedding_lookup_unique(embedding, col)

        if category_sum is None:
            category_sum = embedded_col
        else:
            category_sum = tf.concat([category_sum, embedded_col], 1)

    tf.set_random_seed(1)
    w = tf.get_variable("W", [numeric_input.shape[1] + category_sum.shape[1], 1], initializer=tf.contrib.layers.xavier_initializer())
    wmodel_logits_sum = tf.matmul(tf.concat([numeric_input, category_sum], 1), w)

    return wmodel_logits_sum
예제 #4
0
def embed_sequence(ids,
                   vocab_size=None,
                   embed_dim=None,
                   unique=False,
                   initializer=None,
                   regularizer=None,
                   trainable=True,
                   scope=None,
                   reuse=None):
    """Maps a sequence of symbols to a sequence of embeddings.

  Typical use case would be reusing embeddings between an encoder and decoder.

  Args:
    ids: `[batch_size, doc_length]` `Tensor` of type `int32` or `int64`
      with symbol ids.
    vocab_size: Integer number of symbols in vocabulary.
    embed_dim: Integer number of dimensions for embedding matrix.
    unique: If `True`, will first compute the unique set of indices, and then
         lookup each embedding once, repeating them in the output as needed.
    initializer: An initializer for the embeddings, if `None` default for
        current scope is used.
    regularizer: Optional regularizer for the embeddings.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    scope: Optional string specifying the variable scope for the op, required
        if `reuse=True`.
    reuse: If `True`, variables inside the op will be reused.

  Returns:
    `Tensor` of `[batch_size, doc_length, embed_dim]` with embedded sequences.

  Raises:
    ValueError: if `embed_dim` or `vocab_size` are not specified when not
      `reuse` is `None` or `False`.
  """
    if not (reuse or (vocab_size and embed_dim)):
        raise ValueError(
            'Must specify vocab size and embedding dimension when not'
            'reusing. Got vocab_size=%s and embed_dim=%s' %
            (vocab_size, embed_dim))
    with variable_scope.variable_scope(scope,
                                       'EmbedSequence', [ids],
                                       reuse=reuse):
        shape = [vocab_size, embed_dim]
        if reuse and vocab_size is None or embed_dim is None:
            shape = None
        embeddings = variables.model_variable('embeddings',
                                              shape=shape,
                                              initializer=initializer,
                                              regularizer=regularizer,
                                              trainable=trainable)
        if unique:
            return contrib_embedding_ops.embedding_lookup_unique(
                embeddings, ids)
        return embedding_ops.embedding_lookup(embeddings, ids)
예제 #5
0
  def test_embedding_lookup_unique_param3d(self):
    embeds = np.random.randn(5, 3, 3)
    idx = np.random.randint(0, 5, 10)
    idx2d = np.random.randint(0, 5, (10, 2))

    with self.test_session():
      embedded_np = embeds[idx]
      embedded_np2d = embeds[idx2d]
      embedded_tf = embedding_ops.embedding_lookup_unique(embeds, idx).eval()
      embedded_tf_lst = embedding_ops.embedding_lookup_unique([embeds],
                                                              idx).eval()
      embedded_tf2d = embedding_ops.embedding_lookup_unique(embeds,
                                                            idx2d).eval()

    self.assertEqual(embedded_np.shape, embedded_tf.shape)
    np.testing.assert_almost_equal(embedded_np, embedded_tf)
    self.assertEqual(embedded_np.shape, embedded_tf_lst.shape)
    np.testing.assert_almost_equal(embedded_np, embedded_tf_lst)
    self.assertEqual(embedded_np2d.shape, embedded_tf2d.shape)
    np.testing.assert_almost_equal(embedded_np2d, embedded_tf2d)
예제 #6
0
  def test_embedding_lookup_unique_param3d(self):
    embeds = np.random.randn(5, 3, 3)
    idx = np.random.randint(0, 5, 10)
    idx2d = np.random.randint(0, 5, (10, 2))

    with self.cached_session():
      embedded_np = embeds[idx]
      embedded_np2d = embeds[idx2d]
      embedded_tf = embedding_ops.embedding_lookup_unique(embeds, idx).eval()
      embedded_tf_lst = embedding_ops.embedding_lookup_unique([embeds],
                                                              idx).eval()
      embedded_tf2d = embedding_ops.embedding_lookup_unique(embeds,
                                                            idx2d).eval()

    self.assertEqual(embedded_np.shape, embedded_tf.shape)
    np.testing.assert_almost_equal(embedded_np, embedded_tf)
    self.assertEqual(embedded_np.shape, embedded_tf_lst.shape)
    np.testing.assert_almost_equal(embedded_np, embedded_tf_lst)
    self.assertEqual(embedded_np2d.shape, embedded_tf2d.shape)
    np.testing.assert_almost_equal(embedded_np2d, embedded_tf2d)
예제 #7
0
def embed_sequence(ids,
                   vocab_size=None,
                   embed_dim=None,
                   unique=False,
                   initializer=None,
                   regularizer=None,
                   trainable=True,
                   scope=None,
                   reuse=None):
  """Maps a sequence of symbols to a sequence of embeddings.

  Typical use case would be reusing embeddings between an encoder and decoder.

  Args:
    ids: `[batch_size, doc_length]` `Tensor` of type `int32` or `int64`
      with symbol ids.
    vocab_size: Integer number of symbols in vocabulary.
    embed_dim: Integer number of dimensions for embedding matrix.
    unique: If `True`, will first compute the unique set of indices, and then
         lookup each embedding once, repeating them in the output as needed.
    initializer: An initializer for the embeddings, if `None` default for
        current scope is used.
    regularizer: Optional regularizer for the embeddings.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    scope: Optional string specifying the variable scope for the op, required
        if `reuse=True`.
    reuse: If `True`, variables inside the op will be reused.

  Returns:
    `Tensor` of `[batch_size, doc_length, embed_dim]` with embedded sequences.

  Raises:
    ValueError: if `embed_dim` or `vocab_size` are not specified when 
      `reuse` is `None` or `False`.
  """
  if not (reuse or (vocab_size and embed_dim)):
    raise ValueError('Must specify vocab size and embedding dimension when not'
                     'reusing. Got vocab_size=%s and embed_dim=%s' % (
                         vocab_size, embed_dim))
  with variable_scope.variable_scope(
      scope, 'EmbedSequence', [ids], reuse=reuse):
    shape = [vocab_size, embed_dim]
    if reuse and vocab_size is None or embed_dim is None:
      shape = None
    embeddings = variables.model_variable(
        'embeddings', shape=shape,
        initializer=initializer, regularizer=regularizer,
        trainable=trainable)
    if unique:
      return contrib_embedding_ops.embedding_lookup_unique(embeddings, ids)
    return embedding_ops.embedding_lookup(embeddings, ids)
예제 #8
0
  def test_embedding_lookup_unique(self):
    d_embed = 5
    n_embed = 10
    idx_shape = (2, 3, 4)
    embeds = np.random.randn(n_embed, d_embed)
    idx = np.random.randint(0, n_embed, idx_shape)

    with self.cached_session():
      embedded_np = embeds[idx]
      embedded_tf = embedding_ops.embedding_lookup_unique(embeds, idx).eval()

    self.assertEqual(embedded_np.shape, embedded_tf.shape)
    np.testing.assert_almost_equal(embedded_np, embedded_tf)
예제 #9
0
  def test_embedding_lookup_unique(self):
    d_embed = 5
    n_embed = 10
    idx_shape = (2, 3, 4)
    embeds = np.random.randn(n_embed, d_embed)
    idx = np.random.randint(0, n_embed, idx_shape)

    with self.test_session():
      embedded_np = embeds[idx]
      embedded_tf = embedding_ops.embedding_lookup_unique(embeds, idx).eval()

    self.assertEqual(embedded_np.shape, embedded_tf.shape)
    np.testing.assert_almost_equal(embedded_np, embedded_tf)
예제 #10
0
def deep_model(numeric_input, category_input, vocabs, hidden1, hidden2,
               hidden3):
    embedding_output_cnt = 8

    transpose_category_input = tf.transpose(category_input)

    # append emmbadding category input to numeric
    for i in range(0, len(vocabs)):
        embedding = tf.get_variable(
            "deepem" + str(i), [vocabs[i], embedding_output_cnt],
            initializer=tf.contrib.layers.xavier_initializer()
            #partitioner=tf.fixed_size_partitioner(n_pss))
            #partitioner=tf.min_max_variable_partitioner(n_pss, 0, 2 << 10)
        )
        # Pick one column from category input
        col = tf.gather(transpose_category_input, [i])[0]
        #col = tf.nn.embedding_lookup(transpose_category_input, [i])[0]

        embedding_category = embedding_ops.embedding_lookup_unique(
            embedding, col)
        #embedding_category = embedding_lookup(tf.identity(embedding), col)  # batch_size*embedding_output_cnt

        numeric_input = tf.concat([numeric_input, embedding_category], 1)

    # init
    W1 = tf.get_variable("W1", [numeric_input.shape[1], hidden1],
                         initializer=tf.contrib.layers.xavier_initializer())
    b1 = tf.get_variable("b1", [hidden1], initializer=tf.zeros_initializer())
    #    W2 = tf.get_variable("W2", [hidden1, hidden2], initializer=tf.contrib.layers.xavier_initializer())
    #    b2 = tf.get_variable("b2", [hidden2], initializer=tf.zeros_initializer())
    #    W3 = tf.get_variable("W3", [hidden2, hidden3], initializer=tf.contrib.layers.xavier_initializer())
    #    b3 = tf.get_variable("b3", [hidden3], initializer=tf.zeros_initializer())

    # forward
    Z1 = tf.add(tf.matmul(numeric_input, W1), b1)  # Z1 = np.dot(W1, X) + b1
    A1 = tf.nn.tanh(Z1)  # A1 = relu(Z1)
    #    Z2 = tf.add(tf.matmul(A1, W2), b2)  # Z2 = np.dot(W2, a1) + b2
    #    A2 = tf.nn.tanh(Z2)  # A2 = relu(Z2)
    #    Z3 = tf.add(tf.matmul(A2, W3), b3)  # Z3 = np.dot(W3,Z2) + b3
    #    A3 = tf.nn.tanh(Z3)

    return A1
예제 #11
0
def deep_model(numeric_input, category_input, vocabs, hidden1, hidden2, hidden3):
    embedding_output_cnt = 8

    transpose_category_input = tf.transpose(category_input)

    # append emmbadding category input to numeric
    for i in range(0, len(vocabs)):
        embedding = tf.get_variable("deepem" + str(i), [vocabs[i], embedding_output_cnt],
                                    initializer=tf.contrib.layers.xavier_initializer()
                                    #partitioner=tf.fixed_size_partitioner(n_pss))
                                    #partitioner=tf.min_max_variable_partitioner(n_pss, 0, 2 << 10)
                                    )
        # Pick one column from category input
        col = tf.gather(transpose_category_input, [i])[0]
        #col = tf.nn.embedding_lookup(transpose_category_input, [i])[0]

        embedding_category = embedding_ops.embedding_lookup_unique(embedding, col)
        #embedding_category = embedding_lookup(tf.identity(embedding), col)  # batch_size*embedding_output_cnt

        numeric_input = tf.concat([numeric_input, embedding_category], 1)

    # init
    W1 = tf.get_variable("W1", [numeric_input.shape[1], hidden1], initializer=tf.contrib.layers.xavier_initializer())
    b1 = tf.get_variable("b1", [hidden1], initializer=tf.zeros_initializer())
#    W2 = tf.get_variable("W2", [hidden1, hidden2], initializer=tf.contrib.layers.xavier_initializer())
#    b2 = tf.get_variable("b2", [hidden2], initializer=tf.zeros_initializer())
#    W3 = tf.get_variable("W3", [hidden2, hidden3], initializer=tf.contrib.layers.xavier_initializer())
#    b3 = tf.get_variable("b3", [hidden3], initializer=tf.zeros_initializer())

    # forward
    Z1 = tf.add(tf.matmul(numeric_input, W1), b1)  # Z1 = np.dot(W1, X) + b1
    A1 = tf.nn.tanh(Z1)  # A1 = relu(Z1)
#    Z2 = tf.add(tf.matmul(A1, W2), b2)  # Z2 = np.dot(W2, a1) + b2
#    A2 = tf.nn.tanh(Z2)  # A2 = relu(Z2)
#    Z3 = tf.add(tf.matmul(A2, W3), b3)  # Z3 = np.dot(W3,Z2) + b3
#    A3 = tf.nn.tanh(Z3)

    return A1
예제 #12
0
    def build(self, for_deploy, variants=""):
        conf = self.conf
        name = self.name
        job_type = self.job_type
        dtype = self.dtype
        self.beam_size = 1 if (not for_deploy or variants == "score") else sum(
            self.conf.beam_splits)

        # Input maps
        self.in_table = lookup.MutableHashTable(key_dtype=tf.string,
                                                value_dtype=tf.int64,
                                                default_value=UNK_ID,
                                                shared_name="in_table",
                                                name="in_table",
                                                checkpoint=True)

        self.enc_str_inps = tf.placeholder(tf.string,
                                           shape=(None, conf.input_max_len),
                                           name="enc_inps")
        self.enc_lens = tf.placeholder(tf.int32, shape=[None], name="enc_lens")
        self.tags = tf.placeholder(tf.int32,
                                   shape=[None, conf.tag_num],
                                   name="tags")
        self.down_wgts = tf.placeholder(tf.float32,
                                        shape=[None],
                                        name="down_wgts")

        # lookup
        self.enc_inps = self.in_table.lookup(self.enc_str_inps)
        #self.enc_inps = tf.Print(self.enc_inps, [self.enc_inps], message="enc_inps", summarize=100000)

        with variable_scope.variable_scope(self.model_kind,
                                           dtype=dtype) as scope:
            # Create encode graph and get attn states
            graphlg.info("Creating embeddings and embedding enc_inps.")
            with ops.device("/cpu:0"):
                self.embedding = variable_scope.get_variable(
                    "embedding", [conf.output_vocab_size, conf.embedding_size],
                    initializer=tf.random_uniform_initializer(-0.08, 0.08))
                self.emb_enc_inps = embedding_lookup_unique(
                    self.embedding, self.enc_inps)

            graphlg.info("Creating dynamic rnn...")
            if conf.bidirectional:
                with variable_scope.variable_scope("encoder",
                                                   dtype=dtype) as scope:
                    cell_fw = CreateMultiRNNCell(conf.cell_model,
                                                 conf.num_units,
                                                 conf.num_layers,
                                                 conf.output_keep_prob)
                    cell_bw = CreateMultiRNNCell(conf.cell_model,
                                                 conf.num_units,
                                                 conf.num_layers,
                                                 conf.output_keep_prob)
                self.enc_outs, self.enc_states = bidirectional_dynamic_rnn(
                    cell_fw=cell_fw,
                    cell_bw=cell_bw,
                    inputs=self.emb_enc_inps,
                    sequence_length=self.enc_lens,
                    dtype=dtype,
                    parallel_iterations=16,
                    scope=scope)

                fw_s, bw_s = self.enc_states
                self.enc_states = []
                for f, b in zip(fw_s, bw_s):
                    if isinstance(f, LSTMStateTuple):
                        self.enc_states.append(
                            LSTMStateTuple(tf.concat([f.c, b.c], axis=1),
                                           tf.concat([f.h, b.h], axis=1)))
                    else:
                        self.enc_states.append(tf.concat([f, b], 1))
                self.enc_outs = tf.concat([self.enc_outs[0], self.enc_outs[1]],
                                          axis=2)
                mem_size = 2 * conf.num_units
                enc_state_size = 2 * conf.num_units
            else:
                with variable_scope.variable_scope("encoder",
                                                   dtype=dtype) as scope:
                    cell = CreateMultiRNNCell(conf.cell_model, conf.num_units,
                                              conf.num_layers,
                                              conf.output_keep_prob)
                self.enc_outs, self.enc_states = dynamic_rnn(
                    cell=cell,
                    inputs=self.emb_enc_inps,
                    sequence_length=self.enc_lens,
                    parallel_iterations=16,
                    scope=scope,
                    dtype=dtype)
                mem_size = conf.num_units
                enc_state_size = conf.num_units

        self.enc_outs = tf.expand_dims(self.enc_outs, -1)
        with variable_scope.variable_scope("cnn", dtype=dtype,
                                           reuse=None) as scope:
            feature_map = FeatureMatrix(conf.conv_conf,
                                        self.enc_outs,
                                        scope=scope,
                                        dtype=dtype)

        vec = tf.contrib.layers.flatten(feature_map)

        with variable_scope.variable_scope("fc", dtype=dtype,
                                           reuse=False) as scope:
            fc_out = FC(inputs=vec,
                        h_size=conf.fc_h_size,
                        o_size=conf.tag_num,
                        act=relu)
        self.outputs = fc_out

        if not for_deploy:
            #self.tags = tf.Print(self.tags, [self.tags], message="tags", summarize=10000)
            loss = tf.losses.softmax_cross_entropy(self.tags, self.outputs)
            see_loss = loss
            tf.summary.scalar("loss", see_loss)
            self.summary_ops = tf.summary.merge_all()
            self.update = self.backprop(loss)

            self.train_outputs_map["loss"] = see_loss
            self.train_outputs_map["update"] = self.update

            self.fo_outputs_map["loss"] = see_loss

            self.debug_outputs_map["loss"] = see_loss
            self.debug_outputs_map["outputs"] = self.outputs,
            self.debug_outputs_map["update"] = self.update
            #saver
            self.trainable_params.extend(tf.trainable_variables())
            self.saver = tf.train.Saver(max_to_keep=conf.max_to_keep)
        else:
            if variants == "":
                self.infer_outputs_map["tags"] = tf.nn.softmax(self.outputs)
            else:
                pass

            #saver
            self.trainable_params.extend(tf.trainable_variables())
            self.saver = tf.train.Saver(max_to_keep=conf.max_to_keep)

            # Exporter for serving
            self.model_exporter = exporter.Exporter(self.saver)
            inputs = {
                "enc_inps:0": self.enc_str_inps,
                "enc_lens:0": self.enc_lens
            }
            outputs = self.infer_outputs_map
            self.model_exporter.init(tf.get_default_graph().as_graph_def(),
                                     named_graph_signatures={
                                         "inputs":
                                         exporter.generic_signature(inputs),
                                         "outputs":
                                         exporter.generic_signature(outputs)
                                     })
            graphlg.info("Graph done")
            graphlg.info("")
        return
예제 #13
0
	def build(self, inputs, for_deploy):
		scope = ""
		conf = self.conf
		name = self.name
		job_type = self.job_type
		dtype = self.dtype
		self.beam_splits = conf.beam_splits
		self.beam_size = 1 if not for_deploy else sum(self.beam_splits)

		self.enc_str_inps = inputs["enc_inps:0"]
		self.dec_str_inps = inputs["dec_inps:0"]
		self.enc_lens = inputs["enc_lens:0"] 
		self.dec_lens = inputs["dec_lens:0"]
		self.down_wgts = inputs["down_wgts:0"]

		with tf.name_scope("TableLookup"):
			# Input maps
			self.in_table = lookup.MutableHashTable(key_dtype=tf.string,
														 value_dtype=tf.int64,
														 default_value=UNK_ID,
														 shared_name="in_table",
														 name="in_table",
														 checkpoint=True)

			self.out_table = lookup.MutableHashTable(key_dtype=tf.int64,
													 value_dtype=tf.string,
													 default_value="_UNK",
													 shared_name="out_table",
													 name="out_table",
													 checkpoint=True)
			# lookup
			self.enc_inps = self.in_table.lookup(self.enc_str_inps)
			self.dec_inps = self.in_table.lookup(self.dec_str_inps)

		graphlg.info("Preparing decoder inps...")
		dec_inps = tf.slice(self.dec_inps, [0, 0], [-1, conf.output_max_len + 1])

		# Create encode graph and get attn states
		graphlg.info("Creating embeddings and embedding enc_inps.")
		with ops.device("/cpu:0"):
			self.embedding = variable_scope.get_variable("embedding", [conf.output_vocab_size, conf.embedding_size])
		with tf.name_scope("Embed") as scope:
			dec_inps = tf.slice(self.dec_inps, [0, 0], [-1, conf.output_max_len + 1])
			with ops.device("/cpu:0"):
				self.emb_inps = embedding_lookup_unique(self.embedding, self.enc_inps)
				emb_dec_inps = embedding_lookup_unique(self.embedding, dec_inps)

		graphlg.info("Creating dynamic x rnn...")
		self.enc_outs, self.enc_states, mem_size, enc_state_size = DynRNN(conf.cell_model, conf.num_units, conf.num_layers,
																		self.emb_inps, self.enc_lens, keep_prob=1.0,
																		bidi=conf.bidirectional, name_scope="DynRNNEncoder")
		batch_size = tf.shape(self.enc_outs)[0]

		if self.conf.attention:
			init_h = self.enc_states[-1].h
		else:
			mechanism = dynamic_attention_wrapper.LuongAttention(num_units=conf.num_units, memory=self.enc_outs, 
																	max_mem_size=self.conf.input_max_len,
																	memory_sequence_length=self.enc_lens)
			init_h = mechanism(self.enc_states[-1].h)

		if isinstance(self.enc_states[-1], LSTMStateTuple):
			enc_state = LSTMStateTuple(self.enc_states[-1].c, init_h) 
		
		hidden_units = int(math.sqrt(mem_size * self.conf.enc_latent_dim))
		z, mu_prior, logvar_prior = PriorNet([enc_state], hidden_units, self.conf.enc_latent_dim, stddev=1.0, prior_type=conf.prior_type)

		KLD = 0.0
		# Different graph for training and inference time 
		if not for_deploy:
			# Y inputs for posterior z 
			with tf.name_scope("YEncode"):
				y_emb_inps = tf.slice(emb_dec_inps, [0, 1, 0], [-1, -1, -1])
				y_enc_outs, y_enc_states, y_mem_size, y_enc_state_size = DynRNN(conf.cell_model, conf.num_units, conf.num_layers,
																			y_emb_inps, self.dec_lens, keep_prob=1.0, bidi=False, name_scope="y_enc")
				y_enc_state = y_enc_states[-1]

				z, KLD, l2 = CreateVAE([enc_state, y_enc_state], self.conf.enc_latent_dim, mu_prior, logvar_prior)

		# project z + x_thinking_state to decoder state
		raw_dec_states = [z, enc_state]
		# add BOW loss
		#num_hidden_units = int(math.sqrt(conf.output_vocab_size * int(decision_state.shape[1])))
		#bow_l1 = layers_core.Dense(num_hidden_units, use_bias=True, name="bow_hidden", activation=tf.tanh)
		#bow_l2 = layers_core.Dense(conf.output_vocab_size, use_bias=True, name="bow_out", activation=None)
		#bow = bow_l2(bow_l1(decision_state)) 

		#y_dec_inps = tf.slice(self.dec_inps, [0, 1], [-1, -1])
		#bow_y = tf.reduce_sum(tf.one_hot(y_dec_inps, on_value=1.0, off_value=0.0, axis=-1, depth=conf.output_vocab_size), axis=1)
		#batch_bow_losses = tf.reduce_sum(bow_y * (-1.0) * tf.nn.log_softmax(bow), axis=1)

		max_mem_size = self.conf.input_max_len + self.conf.output_max_len + 2

		with tf.name_scope("ShapeToBeam") as scope: 
			def _to_beam(t):
				beam_t = tf.reshape(tf.tile(t, [1, self.beam_size]), [-1, int(t.get_shape()[1])])
				return beam_t 
			beam_raw_dec_states = tf.contrib.framework.nest.map_structure(_to_beam, raw_dec_states) 
	
			beam_memory = tf.reshape(tf.tile(self.enc_outs, [1, 1, self.beam_size]), [-1, conf.input_max_len, mem_size])
			beam_memory_lens = tf.squeeze(tf.reshape(tf.tile(tf.expand_dims(self.enc_lens, 1), [1, self.beam_size]), [-1, 1]), 1)
			
		cell = AttnCell(cell_model=conf.cell_model, num_units=mem_size, num_layers=conf.num_layers,
						attn_type=self.conf.attention, memory=beam_memory, mem_lens=beam_memory_lens,
						max_mem_size=max_mem_size, addmem=self.conf.addmem, keep_prob=1.0,
						dtype=tf.float32, name_scope="AttnCell")
		# Fit decision states to shape of attention decoder cell states 
		zero_attn_states = DecStateInit(beam_raw_dec_states, cell, batch_size * self.beam_size)
		
		# Output projection
		with tf.variable_scope("OutProj"):
			graphlg.info("Creating out_proj...") 
			if conf.out_layer_size:
				w = tf.get_variable("proj_w", [conf.out_layer_size, conf.output_vocab_size], dtype=dtype)
			else:
				w = tf.get_variable("proj_w", [mem_size, conf.output_vocab_size], dtype=dtype)
			b = tf.get_variable("proj_b", [conf.output_vocab_size], dtype=dtype)
			self.out_proj = (w, b)
		
		if not for_deploy: 
			inputs = {}
			dec_init_state = zero_attn_states
			hp_train = helper.ScheduledEmbeddingTrainingHelper(inputs=emb_dec_inps, sequence_length=self.dec_lens, 
															   embedding=self.embedding, sampling_probability=0.0,
															   out_proj=self.out_proj)
			output_layer = layers_core.Dense(self.conf.out_layer_size, use_bias=True) if self.conf.out_layer_size else None
			my_decoder = basic_decoder.BasicDecoder(cell=cell, helper=hp_train, initial_state=dec_init_state, output_layer=output_layer)
			cell_outs, final_state = decoder.dynamic_decode(decoder=my_decoder, impute_finished=False,
															maximum_iterations=conf.output_max_len + 1, scope=scope)
			outputs = cell_outs.rnn_output

			L = tf.shape(outputs)[1]
			outputs = tf.reshape(outputs, [-1, int(self.out_proj[0].shape[0])])
			outputs = tf.matmul(outputs, self.out_proj[0]) + self.out_proj[1] 
			logits = tf.reshape(outputs, [-1, L, int(self.out_proj[0].shape[1])])

			# branch 1 for debugging, doesn't have to be called
			#m = tf.shape(self.outputs)[0]
			#self.mask = tf.zeros([m, int(w.shape[1])])
			#for i in [3]:
			#	self.mask = self.mask + tf.one_hot(indices=tf.ones([m], dtype=tf.int32) * i, on_value=100.0, depth=int(w.shape[1]))
			#self.outputs = self.outputs - self.mask

			with tf.name_scope("DebugOutputs") as scope:
				self.outputs = tf.argmax(logits, axis=2)
				self.outputs = tf.reshape(self.outputs, [-1, L])
				self.outputs = self.out_table.lookup(tf.cast(self.outputs, tf.int64))

			# branch 2 for loss
			with tf.name_scope("Loss") as scope:
				tars = tf.slice(self.dec_inps, [0, 1], [-1, L])
				wgts = tf.cumsum(tf.one_hot(self.dec_lens, L), axis=1, reverse=True)

				#wgts = wgts * tf.expand_dims(self.down_wgts, 1)
				self.loss = loss.sequence_loss(logits=logits, targets=tars, weights=wgts, average_across_timesteps=False, average_across_batch=False)
				batch_wgt = tf.reduce_sum(self.down_wgts) + 1e-12 
				#bow_loss = tf.reduce_sum(batch_bow_losses * self.down_wgts) / batch_wgt

				example_losses = tf.reduce_sum(self.loss, 1)
				see_loss = tf.reduce_sum(example_losses / tf.cast(self.dec_lens, tf.float32) * self.down_wgts) / batch_wgt
				KLD = tf.reduce_sum(KLD * self.down_wgts) / batch_wgt
				self.loss = tf.reduce_sum((example_losses + self.conf.kld_ratio * KLD) / tf.cast(self.dec_lens, tf.float32) * self.down_wgts) / batch_wgt

			with tf.name_scope(self.model_kind):
				tf.summary.scalar("loss", see_loss)
				tf.summary.scalar("kld", KLD) 
				#tf.summary.scalar("bow", bow_loss)

			graph_nodes = {
				"loss":self.loss,
				"inputs":inputs,
				"debug_outputs":self.outputs,
				"outputs":{},
				"visualize":None
			}
			return graph_nodes
		else:
			hp_infer = helper.GreedyEmbeddingHelper(embedding=self.embedding,
													start_tokens=tf.ones(shape=[batch_size * self.beam_size], dtype=tf.int32),
													end_token=EOS_ID, out_proj=self.out_proj)
			output_layer = layers_core.Dense(self.conf.out_layer_size, use_bias=True) if self.conf.out_layer_size else None
			dec_init_state = beam_decoder.BeamState(tf.zeros([batch_size * self.beam_size]), zero_attn_states, tf.zeros([batch_size * self.beam_size], tf.int32))

			my_decoder = beam_decoder.BeamDecoder(cell=cell, helper=hp_infer, out_proj=self.out_proj, initial_state=dec_init_state,
													beam_splits=self.beam_splits, max_res_num=self.conf.max_res_num, output_layer=output_layer)
			cell_outs, final_state = decoder.dynamic_decode(decoder=my_decoder, scope=scope, maximum_iterations=self.conf.output_max_len)

			L = tf.shape(cell_outs.beam_ends)[1]
			beam_symbols = cell_outs.beam_symbols
			beam_parents = cell_outs.beam_parents

			beam_ends = cell_outs.beam_ends
			beam_end_parents = cell_outs.beam_end_parents
			beam_end_probs = cell_outs.beam_end_probs
			alignments = cell_outs.alignments

			beam_ends = tf.reshape(tf.transpose(beam_ends, [0, 2, 1]), [-1, L])
			beam_end_parents = tf.reshape(tf.transpose(beam_end_parents, [0, 2, 1]), [-1, L])
			beam_end_probs = tf.reshape(tf.transpose(beam_end_probs, [0, 2, 1]), [-1, L])


			# Creating tail_ids 
			batch_size = tf.Print(batch_size, [batch_size], message="CVAERNN batch")

			#beam_symbols = tf.Print(cell_outs.beam_symbols, [tf.shape(cell_outs.beam_symbols)], message="beam_symbols")
			#beam_parents = tf.Print(cell_outs.beam_parents, [tf.shape(cell_outs.beam_parents)], message="beam_parents")
			#beam_ends = tf.Print(cell_outs.beam_ends, [tf.shape(cell_outs.beam_ends)], message="beam_ends") 
			#beam_end_parents = tf.Print(cell_outs.beam_end_parents, [tf.shape(cell_outs.beam_end_parents)], message="beam_end_parents") 
			#beam_end_probs = tf.Print(cell_outs.beam_end_probs, [tf.shape(cell_outs.beam_end_probs)], message="beam_end_probs") 
			#alignments = tf.Print(cell_outs.alignments, [tf.shape(cell_outs.alignments)], message="beam_attns")

			batch_offset = tf.expand_dims(tf.cumsum(tf.ones([batch_size, self.beam_size], dtype=tf.int32) * self.beam_size, axis=0, exclusive=True), 2)
			offset2 = tf.expand_dims(tf.cumsum(tf.ones([batch_size, self.beam_size * 2], dtype=tf.int32) * self.beam_size, axis=0, exclusive=True), 2)

			out_len = tf.shape(beam_symbols)[1]
			self.beam_symbol_strs = tf.reshape(self.out_table.lookup(tf.cast(beam_symbols, tf.int64)), [batch_size, self.beam_size, -1])
			self.beam_parents = tf.reshape(beam_parents, [batch_size, self.beam_size, -1]) - batch_offset

			self.beam_ends = tf.reshape(beam_ends, [batch_size, self.beam_size * 2, -1])
			self.beam_end_parents = tf.reshape(beam_end_parents, [batch_size, self.beam_size * 2, -1]) - offset2
			self.beam_end_probs = tf.reshape(beam_end_probs, [batch_size, self.beam_size * 2, -1])
			self.beam_attns = tf.reshape(alignments, [batch_size, self.beam_size, out_len, -1])

			#cell_outs.alignments
			#self.outputs = tf.concat([outputs_str, tf.cast(cell_outs.beam_parents, tf.string)], 1)

			#ones = tf.ones([batch_size, self.beam_size], dtype=tf.int32)
			#aux_matrix = tf.cumsum(ones * self.beam_size, axis=0, exclusive=True)

			#tm_beam_parents_reverse = tf.reverse(tf.transpose(cell_outs.beam_parents), axis=[0])
			#beam_probs = final_state[1] 

			#def traceback(prev_out, curr_input):
			#	return tf.gather(curr_input, prev_out) 
			#	
			#tail_ids = tf.reshape(tf.cumsum(ones, axis=1, exclusive=True) + aux_matrix, [-1])
			#tm_symbol_index_reverse = tf.scan(traceback, tm_beam_parents_reverse, initializer=tail_ids)
			## Create beam index for symbols, and other info  
			#tm_symbol_index = tf.concat([tf.expand_dims(tail_ids, 0), tm_symbol_index_reverse], axis=0)
			#tm_symbol_index = tf.reverse(tm_symbol_index, axis=[0])
			#tm_symbol_index = tf.slice(tm_symbol_index, [1, 0], [-1, -1])
			#symbol_index = tf.expand_dims(tf.transpose(tm_symbol_index), axis=2)
			#symbol_index = tf.concat([symbol_index, tf.cumsum(tf.ones_like(symbol_index), exclusive=True, axis=1)], axis=2)

			## index alignments and output symbols
			#alignments = tf.gather_nd(cell_outs.alignments, symbol_index)
			#symbol_ids = tf.gather_nd(cell_outs.beam_symbols, symbol_index)

			## outputs and other info
			#self.others = [alignments, beam_probs]
			#self.outputs = self.out_table.lookup(tf.cast(symbol_ids, tf.int64))

			inputs = { 
				"enc_inps:0":self.enc_str_inps,
				"enc_lens:0":self.enc_lens
			}
			outputs = {
				"beam_symbols":self.beam_symbol_strs,
				"beam_parents":self.beam_parents,
				"beam_ends":self.beam_ends,
				"beam_end_parents":self.beam_end_parents,
				"beam_end_probs":self.beam_end_probs,
				"beam_attns":self.beam_attns
			}

			graph_nodes = {
				"loss":None,
				"inputs":inputs,
				"outputs":outputs,
				"visualize":{"z":z}
			}

			return graph_nodes
예제 #14
0
    def build(self, inputs, for_deploy):
        scope = ""
        conf = self.conf
        name = self.name
        job_type = self.job_type
        dtype = self.dtype
        self.beam_splits = conf.beam_splits
        self.beam_size = 1 if not for_deploy else sum(self.beam_splits)

        self.enc_str_inps = inputs["enc_inps:0"]
        self.dec_str_inps = inputs["dec_inps:0"]
        self.enc_lens = inputs["enc_lens:0"]
        self.dec_lens = inputs["dec_lens:0"]
        self.down_wgts = inputs["down_wgts:0"]

        with tf.name_scope("TableLookup"):
            # Input maps
            self.in_table = lookup.MutableHashTable(key_dtype=tf.string,
                                                    value_dtype=tf.int64,
                                                    default_value=UNK_ID,
                                                    shared_name="in_table",
                                                    name="in_table",
                                                    checkpoint=True)

            self.out_table = lookup.MutableHashTable(key_dtype=tf.int64,
                                                     value_dtype=tf.string,
                                                     default_value="_UNK",
                                                     shared_name="out_table",
                                                     name="out_table",
                                                     checkpoint=True)
            # lookup
            self.enc_inps = self.in_table.lookup(self.enc_str_inps)
            self.dec_inps = self.in_table.lookup(self.dec_str_inps)

        graphlg.info("Preparing decoder inps...")
        dec_inps = tf.slice(self.dec_inps, [0, 0],
                            [-1, conf.output_max_len + 1])

        # Create encode graph and get attn states
        graphlg.info("Creating embeddings and embedding enc_inps.")
        with ops.device("/cpu:0"):
            self.embedding = variable_scope.get_variable(
                "embedding", [conf.output_vocab_size, conf.embedding_size])
        with tf.name_scope("Embed") as scope:
            dec_inps = tf.slice(self.dec_inps, [0, 0],
                                [-1, conf.output_max_len + 1])
            with ops.device("/cpu:0"):
                self.emb_inps = embedding_lookup_unique(
                    self.embedding, self.enc_inps)
                emb_dec_inps = embedding_lookup_unique(self.embedding,
                                                       dec_inps)

        graphlg.info("Creating dynamic x rnn...")
        self.enc_outs, self.enc_states, mem_size, enc_state_size = DynRNN(
            conf.cell_model,
            conf.num_units,
            conf.num_layers,
            self.emb_inps,
            self.enc_lens,
            keep_prob=1.0,
            bidi=conf.bidirectional,
            name_scope="DynRNNEncoder")
        batch_size = tf.shape(self.enc_outs)[0]

        if self.conf.attention:
            init_h = self.enc_states[-1].h
        else:
            mechanism = dynamic_attention_wrapper.LuongAttention(
                num_units=conf.num_units,
                memory=self.enc_outs,
                max_mem_size=self.conf.input_max_len,
                memory_sequence_length=self.enc_lens)
            init_h = mechanism(self.enc_states[-1].h)

        if isinstance(self.enc_states[-1], LSTMStateTuple):
            enc_state = LSTMStateTuple(self.enc_states[-1].c, init_h)
            all_emb = tf.concat([enc_state.c, enc_state.h], 1)
        else:
            all_emb = enc_state

        all_emb = tf.Print(all_emb, [tf.shape(all_emb)[0]],
                           message="batch_size")

        query_emb, can_embs = tf.split(all_emb, [1, -1], 0)
        query_emb_normalized = tf.nn.l2_normalize(query_emb, 1)
        can_embs_normalized = tf.nn.l2_normalize(can_embs, 1)
        cos_dist_embs = tf.reduce_sum(
            query_emb_normalized * can_embs_normalized, 1)

        sum_word_embs = tf.reduce_sum(self.emb_inps, 1)
        query_word_emb, can_word_embs = tf.split(sum_word_embs, [1, -1], 0)
        query_word_emb_normalized = tf.nn.l2_normalize(query_word_emb, 1)
        can_word_embs_normalized = tf.nn.l2_normalize(can_word_embs, 1)
        cos_dist_word_embs = tf.reduce_sum(
            query_word_emb_normalized * can_word_embs_normalized, 1)

        inputs = {"enc_inps:0": self.enc_str_inps, "enc_lens:0": self.enc_lens}

        graph_nodes = {
            "loss": None,
            "inputs": inputs,
            "outputs": {
                "rnn_enc": tf.concat([tf.zeros([1]), cos_dist_embs], 0),
                "sum_emb": tf.concat([tf.zeros([1]), cos_dist_word_embs], 0)
            },
        }
        return graph_nodes
예제 #15
0
    def build(self):
        # All possible inputs
        graphlg.info("Creating inputs and tables...")
        batch_size = None
        self.enc_querys = tf.placeholder(
            tf.string,
            shape=[batch_size, conf.input_max_len],
            name="enc_querys")
        self.query_lens = tf.placeholder(tf.int32,
                                         shape=[batch_size],
                                         name="query_lens")

        self.enc_posts = tf.placeholder(tf.string,
                                        shape=[batch_size, conf.input_max_len],
                                        name="enc_posts")
        self.post_lens = tf.placeholder(tf.int32,
                                        shape=[batch_size],
                                        name="post_lens")

        self.enc_resps = tf.placeholder(tf.string,
                                        shape=[batch_size, conf.input_max_len],
                                        name="enc_resps")
        self.resp_lens = tf.placeholder(tf.int32,
                                        shape=[batch_size],
                                        name="resp_lens")
        self.target = tf.placeholder(tf.float32,
                                     shape=[batch_size],
                                     name="target")

        #TODO table obj, lookup ops and embedding and its lookup op should be placed on the same device
        with tf.device("/cpu:0"):
            self.embedding = variable_scope.get_variable(
                "embedding", [conf.input_vocab_size, conf.embedding_size],
                initializer=tf.random_uniform_initializer(-0.08, 0.08))

            self.in_table = lookup.MutableHashTable(key_dtype=tf.string,
                                                    value_dtype=tf.int64,
                                                    default_value=UNK_ID,
                                                    shared_name="in_table",
                                                    name="in_table",
                                                    checkpoint=True)
            self.query_embs = embedding_lookup_unique(
                self.embedding, self.in_table.lookup(self.enc_querys))
            self.post_embs = embedding_lookup_unique(
                self.embedding, self.in_table.lookup(self.enc_posts))
            self.resp_embs = embedding_lookup_unique(
                self.embedding, self.in_table.lookup(self.enc_resps))

        # MultiRNNCell

        graphlg.info("Creating multi-layer cells...")

        # Bi-RNN encoder
        graphlg.info("Creating bi-rnn...")
        #q_out = self.query_embs
        with variable_scope.variable_scope("q_rnn", dtype=dtype,
                                           reuse=None) as scope:
            cell1 = MultiRNNCell(
                [CreateCell(conf) for _ in range(conf.num_layers)])
            cell2 = MultiRNNCell(
                [CreateCell(conf) for _ in range(conf.num_layers)])
            q_out, q_out_state = bidirectional_dynamic_rnn(
                cell_fw=cell1,
                cell_bw=cell2,
                inputs=self.query_embs,
                sequence_length=self.query_lens,
                initial_state_fw=None,
                initial_state_bw=None,
                dtype=dtype,
                parallel_iterations=16,
                swap_memory=False,
                time_major=False,
                scope=scope)
        with variable_scope.variable_scope("p_rnn", dtype=dtype,
                                           reuse=None) as scope:
            cell1 = MultiRNNCell(
                [CreateCell(conf) for _ in range(conf.num_layers)])
            cell2 = MultiRNNCell(
                [CreateCell(conf) for _ in range(conf.num_layers)])
            p_out, p_out_state = bidirectional_dynamic_rnn(
                cell_fw=cell1,
                cell_bw=cell2,
                inputs=self.post_embs,
                sequence_length=self.post_lens,
                initial_state_fw=None,
                initial_state_bw=None,
                dtype=dtype,
                parallel_iterations=16,
                swap_memory=False,
                time_major=False,
                scope=scope)
        with variable_scope.variable_scope("r_rnn", dtype=dtype,
                                           reuse=None) as scope:
            cell1 = MultiRNNCell(
                [CreateCell(conf) for _ in range(conf.num_layers)])
            cell2 = MultiRNNCell(
                [CreateCell(conf) for _ in range(conf.num_layers)])
            r_out, r_out_state = bidirectional_dynamic_rnn(
                cell_fw=cell1,
                cell_bw=cell2,
                inputs=self.resp_embs,
                sequence_length=self.resp_lens,
                initial_state_fw=None,
                initial_state_bw=None,
                dtype=dtype,
                parallel_iterations=16,
                swap_memory=False,
                time_major=False,
                scope=scope)

        #q_out_state = tf.concat(q_out_state, axis=1)
        #p_out_state = tf.concat(p_out_state, axis=1)
        #r_out_state = tf.concat(r_out_state, axis=1)

        q_out = tf.concat(q_out, axis=2)
        p_out = tf.concat(p_out, axis=2)
        r_out = tf.concat(r_out, axis=2)

        # Three feature matrice
        graphlg.info("Creating three cnn feature matrice and cos dist...")
        with variable_scope.variable_scope("q_cnn1", dtype=dtype,
                                           reuse=None) as scope:
            q_m = FeatureMatrix(conf, q_out, scope=scope, dtype=dtype)
        with variable_scope.variable_scope("p_cnn1", dtype=dtype,
                                           reuse=None) as scope:
            p_m = FeatureMatrix(conf, p_out, scope=scope, dtype=dtype)
        with variable_scope.variable_scope("r_cnn1", dtype=dtype,
                                           reuse=None) as scope:
            r_m = FeatureMatrix(conf, r_out, scope=scope, dtype=dtype)

        graphlg.info("Creating interactions...")
        # h becomes 1 after max poolling
        q_vec = tf.reshape(q_m, [-1, conf.num_units * 1 * 2 * conf.c1])
        #q_vec = tf.reshape(q_m, [-1, 1 * 1 * conf.c1])
        p_vec = tf.reshape(p_m, [-1, conf.num_units * 1 * 2 * conf.c1])
        #p_vec = tf.reshape(p_m, [-1, 1 * 1 * conf.c1])
        r_vec = tf.reshape(r_m, [-1, conf.num_units * 1 * 2 * conf.c1])
        #r_vec = tf.reshape(r_m, [-1, 1 * 1 * conf.c1])

        norm_q = tf.sqrt(tf.reduce_sum(tf.square(q_vec), 1, keep_dims=True))
        norm_p = tf.sqrt(tf.reduce_sum(tf.square(p_vec), 1, keep_dims=True))
        norm_r = tf.sqrt(tf.reduce_sum(tf.square(r_vec), 1, keep_dims=True))
        cos_q_p = tf.reduce_sum(q_vec * p_vec, 1,
                                keep_dims=True) / (norm_q * norm_p)
        cos_q_r = tf.reduce_sum(q_vec * r_vec, 1,
                                keep_dims=True) / (norm_q * norm_r)

        qpcos_vec = tf.concat([q_vec, p_vec, cos_q_p], axis=1)
        qrcos_vec = tf.concat([q_vec, r_vec, cos_q_r], axis=1)
        #qpcos_vec = tf.concat([q_vec, p_vec], axis=1)
        #qrcos_vec = tf.concat([q_vec, r_vec], axis=1)

        h_size = int(math.sqrt(conf.num_units * 2 * 1 * conf.c1 * 2 + 1))

        qp_fc1 = tf.contrib.layers.fully_connected(
            inputs=qpcos_vec,
            num_outputs=h_size,
            activation_fn=relu,
            weights_initializer=tf.random_uniform_initializer(-0.08, 0.08),
            biases_initializer=tf.random_uniform_initializer(-0.08, 0.08))
        qp_fc2 = tf.contrib.layers.fully_connected(
            inputs=qp_fc1,
            num_outputs=1,
            activation_fn=tf.nn.sigmoid,
            weights_initializer=tf.random_uniform_initializer(-0.2, 0.2),
            biases_initializer=tf.random_uniform_initializer(-0.4, 0.4))

        qr_fc1 = tf.contrib.layers.fully_connected(
            inputs=qrcos_vec,
            num_outputs=h_size,
            activation_fn=relu,
            weights_initializer=tf.random_uniform_initializer(-0.08, 0.08),
            biases_initializer=tf.random_uniform_initializer(-0.08, 0.08))

        qr_fc2 = tf.contrib.layers.fully_connected(
            inputs=qr_fc1,
            num_outputs=1,
            activation_fn=tf.nn.sigmoid,
            weights_initializer=tf.random_uniform_initializer(-0.2, 0.2),
            biases_initializer=tf.random_uniform_initializer(-0.4, 0.4))

        self.scores = tf.squeeze(qp_fc2 * qr_fc2)

        graphlg.info("Creating optimizer and backpropagation...")
        self.global_params = []
        self.trainable_params = tf.global_variables()
        self.optimizer_params = []

        if not for_deploy:
            with variable_scope.variable_scope("deepmatch",
                                               dtype=dtype) as scope:
                self.loss = tf.reduce_mean(tf.square(self.target -
                                                     self.scores))
                self.summary = tf.summary.scalar("%s/loss" % name, self.loss)

            self.learning_rate = tf.Variable(float(conf.learning_rate),
                                             trainable=False,
                                             name="learning_rate")
            self.learning_rate_decay_op = self.learning_rate.assign(
                self.learning_rate * conf.learning_rate_decay_factor)
            self.global_step = tf.Variable(0,
                                           trainable=False,
                                           name="global_step")
            self.data_idx = tf.Variable(0, trainable=False, name="data_idx")
            self.data_idx_inc_op = self.data_idx.assign(self.data_idx +
                                                        conf.batch_size)

            graphlg.info("Creating backpropagation graph and optimizers...")
            self.optimizers = {
                "SGD":
                tf.train.GradientDescentOptimizer(self.learning_rate),
                "Adadelta":
                tf.train.AdadeltaOptimizer(self.learning_rate),
                "Adagrad":
                tf.train.AdagradOptimizer(self.learning_rate),
                "AdagradDA":
                tf.train.AdagradDAOptimizer(self.learning_rate,
                                            self.global_step),
                "Moment":
                tf.train.MomentumOptimizer(self.learning_rate, 0.9),
                "Ftrl":
                tf.train.FtrlOptimizer(self.learning_rate),
                "RMSProp":
                tf.train.RMSPropOptimizer(self.learning_rate)
            }

            self.opt = self.optimizers[conf.opt_name]
            tmp = set(tf.global_variables())

            if job_type == "worker":
                self.opt = SyncReplicasOptimizer(self.opt,
                                                 conf.replicas_to_aggregate,
                                                 conf.total_num_replicas)
                grads_and_vars = self.opt.compute_gradients(loss=self.loss)
                gradients, variables = zip(*grads_and_vars)
            else:
                gradients = tf.gradients(self.loss, tf.trainable_variables())
                variables = tf.trainable_variables()

            clipped_gradients, self.grad_norm = tf.clip_by_global_norm(
                gradients, conf.max_gradient_norm)
            self.update = self.opt.apply_gradients(
                zip(clipped_gradients, variables), self.global_step)

            self.optimizer_params.append(self.learning_rate)
            self.optimizer_params.extend(
                list(set(tf.global_variables()) - tmp))
            self.global_params.extend([self.global_step, self.data_idx])
            self.saver = tf.train.Saver(max_to_keep=conf.max_to_keep)
예제 #16
0
    def build(self):
        conf = self.conf
        dtype = self.dtype
        # All possible inputs
        graphlg.info("Creating inputs and tables...")
        batch_size = None
        self.enc_querys = tf.placeholder(
            tf.string,
            shape=[batch_size, conf.input_max_len],
            name="enc_querys")
        self.query_lens = tf.placeholder(tf.int32,
                                         shape=[batch_size],
                                         name="query_lens")

        self.enc_posts = tf.placeholder(tf.string,
                                        shape=[batch_size, conf.input_max_len],
                                        name="enc_posts")
        self.post_lens = tf.placeholder(tf.int32,
                                        shape=[batch_size],
                                        name="post_lens")

        self.enc_resps = tf.placeholder(tf.string,
                                        shape=[batch_size, conf.input_max_len],
                                        name="enc_resps")
        self.resp_lens = tf.placeholder(tf.int32,
                                        shape=[batch_size],
                                        name="resp_lens")

        self.enc_neg_resps = tf.placeholder(
            tf.string,
            shape=[batch_size, conf.input_max_len],
            name="enc_neg_resp")
        self.neg_resp_lens = tf.placeholder(tf.int32,
                                            shape=[batch_size],
                                            name="neg_resp_lens")

        #TODO table obj, lookup ops and embedding and its lookup op should be placed on the same device
        with tf.device("/cpu:0"):
            self.embedding = variable_scope.get_variable(
                "embedding", [conf.input_vocab_size, conf.embedding_size],
                initializer=tf.random_uniform_initializer(-0.08, 0.08))

            self.in_table = lookup.MutableHashTable(key_dtype=tf.string,
                                                    value_dtype=tf.int64,
                                                    default_value=UNK_ID,
                                                    shared_name="in_table",
                                                    name="in_table",
                                                    checkpoint=True)
            self.query_embs = embedding_lookup_unique(
                self.embedding, self.in_table.lookup(self.enc_querys))
            self.post_embs = embedding_lookup_unique(
                self.embedding, self.in_table.lookup(self.enc_posts))
            self.resp_embs = embedding_lookup_unique(
                self.embedding, self.in_table.lookup(self.enc_resps))
            self.neg_resp_embs = embedding_lookup_unique(
                self.embedding, self.in_table.lookup(self.enc_neg_resps))

        # MultiRNNCell

        graphlg.info("Creating multi-layer cells...")

        # Bi-RNN encoder
        graphlg.info("Creating bi-rnn...")

        with variable_scope.variable_scope("q_rnn", dtype=dtype,
                                           reuse=None) as scope:
            cell1 = CreateMultiRNNCell(conf.cell_model, conf.num_units,
                                       conf.num_layers, conf.output_keep_prob)
            cell2 = CreateMultiRNNCell(conf.cell_model, conf.num_units,
                                       conf.num_layers, conf.output_keep_prob)
            q_out, q_out_state = bidirectional_dynamic_rnn(
                cell_fw=cell1,
                cell_bw=cell2,
                inputs=self.query_embs,
                sequence_length=self.query_lens,
                initial_state_fw=None,
                initial_state_bw=None,
                dtype=dtype,
                parallel_iterations=16,
                swap_memory=False,
                time_major=False)
        with variable_scope.variable_scope("p_rnn", dtype=dtype,
                                           reuse=None) as scope:
            cell1 = CreateMultiRNNCell(conf.cell_model, conf.num_units,
                                       conf.num_layers, conf.output_keep_prob)
            cell2 = CreateMultiRNNCell(conf.cell_model, conf.num_units,
                                       conf.num_layers, conf.output_keep_prob)
            p_out, p_out_state = bidirectional_dynamic_rnn(
                cell_fw=cell1,
                cell_bw=cell2,
                inputs=self.post_embs,
                sequence_length=self.post_lens,
                initial_state_fw=None,
                initial_state_bw=None,
                dtype=dtype,
                parallel_iterations=16,
                swap_memory=False,
                time_major=False)
        with variable_scope.variable_scope("r_rnn", dtype=dtype,
                                           reuse=None) as scope:
            cell1 = CreateMultiRNNCell(conf.cell_model, conf.num_units,
                                       conf.num_layers, conf.output_keep_prob)
            cell2 = CreateMultiRNNCell(conf.cell_model, conf.num_units,
                                       conf.num_layers, conf.output_keep_prob)
            r_out, r_out_state = bidirectional_dynamic_rnn(
                cell_fw=cell1,
                cell_bw=cell2,
                inputs=self.resp_embs,
                sequence_length=self.resp_lens,
                initial_state_fw=None,
                initial_state_bw=None,
                dtype=dtype,
                parallel_iterations=16,
                swap_memory=False,
                time_major=False)

        with variable_scope.variable_scope("r_rnn", dtype=dtype,
                                           reuse=True) as scope:
            cell1 = CreateMultiRNNCell(conf.cell_model,
                                       conf.num_units,
                                       conf.num_layers,
                                       conf.output_keep_prob,
                                       reuse=True)
            cell2 = CreateMultiRNNCell(conf.cell_model,
                                       conf.num_units,
                                       conf.num_layers,
                                       conf.output_keep_prob,
                                       reuse=True)
            neg_r_out, neg_r_out_state = bidirectional_dynamic_rnn(
                cell_fw=cell1,
                cell_bw=cell2,
                inputs=self.neg_resp_embs,
                sequence_length=self.neg_resp_lens,
                initial_state_fw=None,
                initial_state_bw=None,
                dtype=dtype,
                parallel_iterations=16,
                swap_memory=False,
                time_major=False)
        fw, bw = q_out_state
        q_out_state = tf.concat([fw[-1].h, bw[-1].h], axis=1)
        fw, bw = p_out_state
        p_out_state = tf.concat([fw[-1].h, bw[-1].h], axis=1)
        fw, bw = r_out_state
        r_out_state = tf.concat([fw[-1].h, bw[-1].h], axis=1)
        fw, bw = neg_r_out_state
        neg_r_out_state = tf.concat([fw[-1].h, bw[-1].h], axis=1)

        q_out = tf.concat(q_out, axis=2)
        p_out = tf.concat(p_out, axis=2)
        r_out = tf.concat(r_out, axis=2)
        neg_r_out = tf.concat(neg_r_out, axis=2)

        # Out state Cosine dist
        norm_q = tf.sqrt(
            tf.reduce_sum(tf.square(q_out_state), 1, keep_dims=True))
        norm_p = tf.sqrt(
            tf.reduce_sum(tf.square(p_out_state), 1, keep_dims=True))
        norm_r = tf.sqrt(
            tf.reduce_sum(tf.square(r_out_state), 1, keep_dims=True))
        norm_neg_r = tf.sqrt(
            tf.reduce_sum(tf.square(neg_r_out_state), 1, keep_dims=True))
        cos_qp = tf.reduce_sum(q_out_state * p_out_state, 1,
                               keep_dims=True) / (norm_q * norm_p)
        cos_qr = tf.reduce_sum(q_out_state * r_out_state, 1,
                               keep_dims=True) / (norm_q * norm_r)
        cos_qnegr = tf.reduce_sum(q_out_state * neg_r_out_state,
                                  1,
                                  keep_dims=True) / (norm_q * norm_neg_r)

        # Outputs 2-dim intersection
        graphlg.info("Creating cos dist...")
        qp_sim = tf.expand_dims(tf.matmul(q_out, p_out, transpose_b=True), -1)
        qr_sim = tf.expand_dims(tf.matmul(q_out, r_out, transpose_b=True), -1)
        qnegr_sim = tf.expand_dims(
            tf.matmul(q_out, neg_r_out, transpose_b=True), -1)

        # n-CNN max-poolling
        graphlg.info("Creating interactions...")
        with variable_scope.variable_scope("qp_cnn", dtype=dtype,
                                           reuse=None) as scope:
            qp_map = FeatureMatrix(conf.conv_conf,
                                   qp_sim,
                                   scope=scope,
                                   dtype=dtype)

        with variable_scope.variable_scope("qr_cnn", dtype=dtype,
                                           reuse=None) as scope:
            qr_map = FeatureMatrix(conf.conv_conf,
                                   qr_sim,
                                   scope=scope,
                                   dtype=dtype)

        with variable_scope.variable_scope("qr_cnn", dtype=dtype,
                                           reuse=True) as scope:
            qnegr_map = FeatureMatrix(conf.conv_conf,
                                      qnegr_sim,
                                      scope=scope,
                                      dtype=dtype)

        # h becomes 1 after max poolling
        qp_vec = tf.concat([tf.contrib.layers.flatten(qp_map), cos_qp], 1)
        qr_vec = tf.concat([tf.contrib.layers.flatten(qr_map), cos_qr], 1)
        qnegr_vec = tf.concat(
            [tf.contrib.layers.flatten(qnegr_map), cos_qnegr], 1)

        graphlg.info("Creating fully connected...")
        with variable_scope.variable_scope("qp_fc", dtype=dtype,
                                           reuse=None) as scope:
            qp_fc = FC(inputs=qp_vec,
                       h_size=conf.fc_h_size,
                       o_size=1,
                       act=tf.nn.sigmoid)

        with variable_scope.variable_scope("qr_fc", dtype=dtype,
                                           reuse=None) as scope:
            qr_fc = FC(inputs=qr_vec,
                       h_size=conf.fc_h_size,
                       o_size=1,
                       act=relu)

        with variable_scope.variable_scope("qr_fc", dtype=dtype,
                                           reuse=True) as scope:
            qnegr_fc = FC(inputs=qnegr_vec,
                          h_size=conf.fc_h_size,
                          o_size=1,
                          act=relu)

        self.scores = tf.squeeze(qp_fc * qr_fc)
        self.neg_scores = tf.squeeze(qp_fc * qnegr_fc)

        graphlg.info("Creating optimizer and backpropagation...")
        self.global_params = []
        self.trainable_params = tf.trainable_variables()
        self.optimizer_params = []

        if not self.for_deploy:
            with variable_scope.variable_scope(self.model_kind,
                                               dtype=dtype) as scope:
                #self.loss = tf.losses.hinge_loss(self.neg_scores, self.scores)
                self.loss = tf.reduce_mean(
                    tf.nn.relu(1 + self.neg_scores - self.scores))
                self.summary = tf.summary.scalar("%s/loss" % name, self.loss)

            graphlg.info("Creating backpropagation graph and optimizers...")
            self.learning_rate = tf.Variable(float(conf.learning_rate),
                                             trainable=False,
                                             name="learning_rate")
            self.learning_rate_decay_op = self.learning_rate.assign(
                self.learning_rate * conf.learning_rate_decay_factor)
            self.global_step = tf.Variable(0,
                                           trainable=False,
                                           name="global_step")
            self.data_idx = tf.Variable(0, trainable=False, name="data_idx")
            self.data_idx_inc_op = self.data_idx.assign(self.data_idx +
                                                        conf.batch_size)

            self.optimizers = {
                "SGD":
                tf.train.GradientDescentOptimizer(self.learning_rate),
                "Adadelta":
                tf.train.AdadeltaOptimizer(self.learning_rate),
                "Adagrad":
                tf.train.AdagradOptimizer(self.learning_rate),
                "AdagradDA":
                tf.train.AdagradDAOptimizer(self.learning_rate,
                                            self.global_step),
                "Moment":
                tf.train.MomentumOptimizer(self.learning_rate, 0.9),
                "Ftrl":
                tf.train.FtrlOptimizer(self.learning_rate),
                "RMSProp":
                tf.train.RMSPropOptimizer(self.learning_rate)
            }

            self.opt = self.optimizers[conf.opt_name]
            tmp = set(tf.global_variables())

            if job_type == "worker":
                self.opt = SyncReplicasOptimizer(self.opt,
                                                 conf.replicas_to_aggregate,
                                                 conf.total_num_replicas)
                grads_and_vars = self.opt.compute_gradients(loss=self.loss)
                gradients, variables = zip(*grads_and_vars)
            else:
                gradients = tf.gradients(self.loss, tf.trainable_variables())
                variables = tf.trainable_variables()

            clipped_gradients, self.grad_norm = tf.clip_by_global_norm(
                gradients, conf.max_gradient_norm)
            self.update = self.opt.apply_gradients(
                zip(clipped_gradients, variables), self.global_step)

            self.optimizer_params.append(self.learning_rate)
            self.optimizer_params.extend(
                list(set(tf.global_variables()) - tmp))
            self.global_params.extend([self.global_step, self.data_idx])

        self.saver = tf.train.Saver(max_to_keep=conf.max_to_keep)
        self.model_exporter = exporter.Exporter(self.saver)
        inputs = {
            "enc_querys:0": self.enc_querys,
            "query_lens:0": self.query_lens,
            "enc_posts:0": self.enc_posts,
            "post_lens:0": self.post_lens,
            "enc_resps:0": self.enc_resps,
            "resp_lens:0": self.resp_lens
        }
        outputs = {"out": self.scores}
        self.model_exporter.init(tf.get_default_graph().as_graph_def(),
                                 named_graph_signatures={
                                     "inputs":
                                     exporter.generic_signature(inputs),
                                     "outputs":
                                     exporter.generic_signature(outputs)
                                 })
예제 #17
0
    def build(self):
        conf = self.conf
        name = self.name
        job_type = self.job_type
        dtype = self.dtype

        # Input maps
        self.in_table = lookup.MutableHashTable(key_dtype=tf.string,
                                                value_dtype=tf.int64,
                                                default_value=UNK_ID,
                                                shared_name="in_table",
                                                name="in_table",
                                                checkpoint=True)

        self.topic_in_table = lookup.MutableHashTable(
            key_dtype=tf.string,
            value_dtype=tf.int64,
            default_value=2,
            shared_name="topic_in_table",
            name="topic_in_table",
            checkpoint=True)

        self.out_table = lookup.MutableHashTable(key_dtype=tf.int64,
                                                 value_dtype=tf.string,
                                                 default_value="_UNK",
                                                 shared_name="out_table",
                                                 name="out_table",
                                                 checkpoint=True)

        graphlg.info("Creating placeholders...")
        self.enc_str_inps = tf.placeholder(tf.string,
                                           shape=(None, conf.input_max_len),
                                           name="enc_inps")
        self.enc_lens = tf.placeholder(tf.int32, shape=[None], name="enc_lens")

        self.enc_str_topics = tf.placeholder(tf.string,
                                             shape=(None, None),
                                             name="enc_topics")

        self.dec_str_inps = tf.placeholder(
            tf.string, shape=[None, conf.output_max_len + 2], name="dec_inps")
        self.dec_lens = tf.placeholder(tf.int32, shape=[None], name="dec_lens")

        # table lookup
        self.enc_inps = self.in_table.lookup(self.enc_str_inps)
        self.enc_topics = self.topic_in_table.lookup(self.enc_str_topics)
        self.dec_inps = self.in_table.lookup(self.dec_str_inps)

        batch_size = tf.shape(self.enc_inps)[0]

        with variable_scope.variable_scope(self.model_kind,
                                           dtype=dtype) as scope:
            # Create encode graph and get attn states
            graphlg.info("Creating embeddings and do lookup...")
            t_major_enc_inps = tf.transpose(self.enc_inps)
            with ops.device("/cpu:0"):
                self.embedding = variable_scope.get_variable(
                    "embedding", [conf.input_vocab_size, conf.embedding_size])
                self.emb_enc_inps = embedding_lookup_unique(
                    self.embedding, t_major_enc_inps)
                self.topic_embedding = variable_scope.get_variable(
                    "topic_embedding",
                    [conf.topic_vocab_size, conf.topic_embedding_size],
                    trainable=False)
                self.emb_enc_topics = embedding_lookup_unique(
                    self.topic_embedding, self.enc_topics)

            graphlg.info("Creating out projection weights...")
            if conf.out_layer_size != None:
                w = tf.get_variable(
                    "proj_w", [conf.out_layer_size, conf.output_vocab_size],
                    dtype=dtype)
            else:
                w = tf.get_variable("proj_w",
                                    [conf.num_units, conf.output_vocab_size],
                                    dtype=dtype)
            b = tf.get_variable("proj_b", [conf.output_vocab_size],
                                dtype=dtype)
            self.out_proj = (w, b)

            graphlg.info("Creating encoding dynamic rnn...")
            with variable_scope.variable_scope("encoder",
                                               dtype=dtype) as scope:
                if conf.bidirectional:
                    cell_fw = CreateMultiRNNCell(conf.cell_model,
                                                 conf.num_units,
                                                 conf.num_layers,
                                                 conf.output_keep_prob)
                    cell_bw = CreateMultiRNNCell(conf.cell_model,
                                                 conf.num_units,
                                                 conf.num_layers,
                                                 conf.output_keep_prob)
                    self.enc_outs, self.enc_states = bidirectional_dynamic_rnn(
                        cell_fw=cell_fw,
                        cell_bw=cell_bw,
                        inputs=self.emb_enc_inps,
                        sequence_length=self.enc_lens,
                        dtype=dtype,
                        parallel_iterations=16,
                        time_major=True,
                        scope=scope)
                    fw_s, bw_s = self.enc_states
                    self.enc_states = tuple([
                        tf.concat([f, b], axis=1) for f, b in zip(fw_s, bw_s)
                    ])
                    self.enc_outs = tf.concat(
                        [self.enc_outs[0], self.enc_outs[1]], axis=2)
                else:
                    cell = CreateMultiRNNCell(conf.cell_model, conf.num_units,
                                              conf.num_layers,
                                              conf.output_keep_prob)
                    self.enc_outs, self.enc_states = dynamic_rnn(
                        cell=cell,
                        inputs=self.emb_enc_inps,
                        sequence_length=self.enc_lens,
                        parallel_iterations=16,
                        scope=scope,
                        dtype=dtype,
                        time_major=True)
            attn_len = tf.shape(self.enc_outs)[0]

            graphlg.info("Preparing init attention and states for decoder...")
            initial_state = self.enc_states
            attn_states = tf.transpose(self.enc_outs, perm=[1, 0, 2])
            attn_size = self.conf.num_units
            topic_attn_size = self.conf.num_units
            k = tf.get_variable(
                "topic_proj",
                [1, 1, self.conf.topic_embedding_size, topic_attn_size])
            topic_attn_states = nn_ops.conv2d(
                tf.expand_dims(self.emb_enc_topics, 2), k, [1, 1, 1, 1],
                "SAME")
            topic_attn_states = tf.squeeze(topic_attn_states, axis=2)

            graphlg.info("Creating decoder cell...")
            with variable_scope.variable_scope("decoder",
                                               dtype=dtype) as scope:
                cell = CreateMultiRNNCell(conf.cell_model, attn_size,
                                          conf.num_layers,
                                          conf.output_keep_prob)
                # topic
                if not self.for_deploy:
                    graphlg.info(
                        "Embedding decoder inps, tars and tar weights...")
                    t_major_dec_inps = tf.transpose(self.dec_inps)
                    t_major_tars = tf.slice(t_major_dec_inps, [1, 0],
                                            [conf.output_max_len + 1, -1])
                    t_major_dec_inps = tf.slice(t_major_dec_inps, [0, 0],
                                                [conf.output_max_len + 1, -1])
                    t_major_tar_wgts = tf.cumsum(tf.one_hot(
                        self.dec_lens - 1, conf.output_max_len + 1, axis=0),
                                                 axis=0,
                                                 reverse=True)
                    with ops.device("/cpu:0"):
                        emb_dec_inps = embedding_lookup_unique(
                            self.embedding, t_major_dec_inps)

                    hp_train = helper.ScheduledEmbeddingTrainingHelper(
                        inputs=emb_dec_inps,
                        sequence_length=self.enc_lens,
                        embedding=self.embedding,
                        sampling_probability=0.0,
                        out_proj=self.out_proj,
                        except_ids=None,
                        time_major=True)

                    output_layer = None
                    my_decoder = AttnTopicDecoder(
                        cell=cell,
                        helper=hp_train,
                        initial_state=initial_state,
                        attn_states=attn_states,
                        attn_size=attn_size,
                        topic_attn_states=topic_attn_states,
                        topic_attn_size=topic_attn_size,
                        output_layer=output_layer)
                    t_major_cell_outs, final_state = decoder.dynamic_decode(
                        decoder=my_decoder,
                        output_time_major=True,
                        maximum_iterations=conf.output_max_len + 1,
                        scope=scope)
                    t_major_outs = t_major_cell_outs.rnn_output

                    # Branch 1 for debugging, doesn't have to be called
                    self.outputs = tf.transpose(t_major_outs, perm=[1, 0, 2])
                    L = tf.shape(self.outputs)[1]
                    w, b = self.out_proj
                    self.outputs = tf.reshape(self.outputs,
                                              [-1, int(w.shape[0])])
                    self.outputs = tf.matmul(self.outputs, w) + b

                    # For masking the except_ids when debuging
                    #m = tf.shape(self.outputs)[0]
                    #self.mask = tf.zeros([m, int(w.shape[1])])
                    #for i in [3]:
                    #    self.mask = self.mask + tf.one_hot(indices=tf.ones([m], dtype=tf.int32) * i, on_value=100.0, depth=int(w.shape[1]))
                    #self.outputs = self.outputs - self.mask

                    self.outputs = tf.argmax(self.outputs, axis=1)
                    self.outputs = tf.reshape(self.outputs, [-1, L])
                    self.outputs = self.out_table.lookup(
                        tf.cast(self.outputs, tf.int64))

                    # Branch 2 for loss
                    self.loss = dyn_sequence_loss(self.conf, t_major_outs,
                                                  self.out_proj, t_major_tars,
                                                  t_major_tar_wgts)
                    self.summary = tf.summary.scalar("%s/loss" % self.name,
                                                     self.loss)

                    # backpropagation
                    self.build_backprop(self.loss, conf, dtype)

                    #saver
                    self.trainable_params.extend(tf.trainable_variables() +
                                                 [self.topic_embedding])
                    need_to_save = self.global_params + self.trainable_params + self.optimizer_params + tf.get_default_graph(
                    ).get_collection("saveable_objects") + [
                        self.topic_embedding
                    ]
                    self.saver = tf.train.Saver(need_to_save,
                                                max_to_keep=conf.max_to_keep)
                else:
                    hp_infer = helper.GreedyEmbeddingHelper(
                        embedding=self.embedding,
                        start_tokens=tf.ones(shape=[batch_size],
                                             dtype=tf.int32),
                        end_token=EOS_ID,
                        out_proj=self.out_proj)

                    output_layer = None  #layers_core.Dense(self.conf.outproj_from_size, use_bias=True)
                    my_decoder = AttnTopicDecoder(
                        cell=cell,
                        helper=hp_infer,
                        initial_state=initial_state,
                        attn_states=attn_states,
                        attn_size=attn_size,
                        topic_attn_states=topic_attn_states,
                        topic_attn_size=topic_attn_size,
                        output_layer=output_layer)
                    cell_outs, final_state = decoder.dynamic_decode(
                        decoder=my_decoder, scope=scope, maximum_iterations=40)
                    self.outputs = cell_outs.sample_id
                    #lookup
                    self.outputs = self.out_table.lookup(
                        tf.cast(self.outputs, tf.int64))

                    #saver
                    self.trainable_params.extend(tf.trainable_variables())
                    self.saver = tf.train.Saver(max_to_keep=conf.max_to_keep)

                    # Exporter for serving
                    self.model_exporter = exporter.Exporter(self.saver)
                    inputs = {
                        "enc_inps": self.enc_str_inps,
                        "enc_lens": self.enc_lens
                    }
                    outputs = {"out": self.outputs}
                    self.model_exporter.init(
                        tf.get_default_graph().as_graph_def(),
                        named_graph_signatures={
                            "inputs": exporter.generic_signature(inputs),
                            "outputs": exporter.generic_signature(outputs)
                        })
                    graphlg.info("Graph done")
                    graphlg.info("")

                self.dec_states = final_state
예제 #18
0
	def build(self, inputs, for_deploy):
		scope = ""
		conf = self.conf
		dtype = self.dtype
		beam_size = 1 if not for_deploy else sum(conf.beam_splits)

		with tf.name_scope("WordEmbedding"):
			# Input maps
			self.in_table = lookup.MutableHashTable(key_dtype=tf.string,
													value_dtype=tf.int64,
													default_value=UNK_ID,
													shared_name="in_table",
													name="in_table",
													checkpoint=True)

			self.out_table = lookup.MutableHashTable(key_dtype=tf.int64,
													 value_dtype=tf.string,
													 default_value="_UNK",
													 shared_name="out_table",
													 name="out_table",
													 checkpoint=True)
			enc_inps = self.in_table.lookup(inputs["enc_inps:0"])
			dec_inps = self.in_table.lookup(inputs["dec_inps:0"])

			graphlg.info("Creating embeddings and embedding enc_inps.")
			with tf.device("/cpu:0"):
				self.embedding = variable_scope.get_variable("embedding", [conf.output_vocab_size, conf.embedding_size])
				emb_inps = embedding_lookup_unique(self.embedding, enc_inps)
				emb_dec_inps = embedding_lookup_unique(self.embedding, dec_inps)
			emb_dec_next_inps = tf.slice(emb_dec_inps, [0, 0, 0], [-1, conf.output_max_len + 1, -1])

		
		batch_size = tf.shape(enc_inps)[0]

		# Create encode graph and get attn states
		graphlg.info("Creating dynamic x rnn...")
		enc_outs, enc_states, mem_size, enc_state_size = DynEncode(conf.cell_model, conf.num_units, conf.num_layers,
																emb_inps, inputs["enc_lens:0"], keep_prob=1.0,
																bidi=conf.bidirectional, name_scope="DynEncodeX")
		
		with tf.variable_scope("AttnEncState") as scope2:
			mechanism = Luong1_2(num_units=conf.num_units, memory=enc_outs, max_mem_size=conf.input_max_len, memory_sequence_length=inputs["enc_lens:0"], name=scope2.original_name_scope)
			if isinstance(enc_states[-1], LSTMStateTuple):
				#score = tf.expand_dims(tf.nn.softmax(mechanism(enc_states[-1].h)), 1)
				score = tf.expand_dims(mechanism(enc_states[-1].h, ()), 1)
				attention_h = tf.squeeze(tf.matmul(score, enc_outs), 1)
				enc_state = LSTMStateTuple(enc_states[-1].c, attention_h) 
			else:
				#score = tf.expand_dims(tf.nn.softmax(mechanism(enc_states[-1])), 1)
				score = tf.expand_dims(mechanism(enc_states[-1], ()), 1)
				enc_state = tf.squeeze(tf.matmul(score, enc_outs), 1)

		hidden_units = int(math.sqrt(mem_size * conf.enc_latent_dim))
		z, mu_prior, logvar_prior = Ptheta([enc_state], hidden_units, conf.enc_latent_dim, stddev=1, prior_type=conf.prior_type, name_scope="EncToPtheta")

		KLD = 0.0
		# Y inputs for posterior z when training
		if not for_deploy:
			#with tf.name_scope("variational_distribution") as scope:
			y_emb_inps = tf.slice(emb_dec_inps, [0, 1, 0], [-1, -1, -1])
			y_enc_outs, y_enc_states, y_mem_size, y_enc_state_size = DynEncode(conf.cell_model, conf.num_units, conf.num_layers, y_emb_inps, inputs["dec_lens:0"],
																					keep_prob=conf.keep_prob, bidi=False, name_scope="DynEncodeY")
			z, KLD, l2 = VAE([enc_state, y_enc_states[-1]], conf.enc_latent_dim, mu_prior, logvar_prior, name_scope="VAE")

		# project z + x_thinking_state to decoder state
		with tf.name_scope("GatedZState"):
			if isinstance(enc_state, LSTMStateTuple):
				h_gate = tf.layers.dense(z, int(enc_state.h.get_shape()[1]), use_bias=True, name="z_gate_h", activation=tf.sigmoid)
				c_gate = tf.layers.dense(z, int(enc_state.c.get_shape()[1]), use_bias=True, name="z_gate_c", activation=tf.sigmoid)
				raw_dec_states = tf.concat([c_gate * enc_state.c, h_gate * enc_state.h, z], 1)
				#raw_dec_states = LSTMStateTuple(tf.concat([c_gate * enc_state.c, z], 1), tf.concat([h_gate * enc_state.h, z], 1))
			else:
				gate = tf.layers.dense(z, int(enc_state.get_shape()[1]), use_bias=True, name="z_gate", activation=tf.sigmoid)
				raw_dec_states = tf.concat([gate * enc_state, z], 1)

		# add BOW loss
		#num_hidden_units = int(math.sqrt(conf.output_vocab_size * int(decision_state.shape[1])))
		#bow_l1 = layers_core.Dense(num_hidden_units, use_bias=True, name="bow_hidden", activation=tf.tanh)
		#bow_l2 = layers_core.Dense(conf.output_vocab_size, use_bias=True, name="bow_out", activation=None)
		#bow = bow_l2(bow_l1(decision_state)) 

		#y_dec_inps = tf.slice(self.dec_inps, [0, 1], [-1, -1])
		#bow_y = tf.reduce_sum(tf.one_hot(y_dec_inps, on_value=1.0, off_value=0.0, axis=-1, depth=conf.output_vocab_size), axis=1)
		#batch_bow_losses = tf.reduce_sum(bow_y * (-1.0) * tf.nn.log_softmax(bow), axis=1)

		max_mem_size = conf.input_max_len + conf.output_max_len + 2
		with tf.name_scope("ShapeToBeam"):
			beam_raw_dec_states = nest.map_structure(lambda x:tile_batch(x, beam_size), raw_dec_states)
			beam_memory = nest.map_structure(lambda x:tile_batch(x, beam_size), enc_outs)
			beam_memory_lens = tf.squeeze(nest.map_structure(lambda x:tile_batch(x, beam_size), tf.expand_dims(inputs["enc_lens:0"], 1)), 1)
			beam_z = nest.map_structure(lambda x:tile_batch(x, beam_size), z)

		#def _to_beam(t):
		#	beam_t = tf.reshape(tf.tile(t, [1, beam_size]), [-1, int(t.get_shape()[1])])
		#	return beam_t 
		#with tf.name_scope("ShapeToBeam") as scope: 
		#	beam_raw_dec_states = tf.contrib.framework.nest.map_structure(_to_beam, raw_dec_states) 
		#	beam_memory = tf.reshape(tf.tile(self.enc_outs, [1, 1, beam_size]), [-1, conf.input_max_len, mem_size])
		#	beam_memory_lens = tf.squeeze(tf.reshape(tf.tile(tf.expand_dims(inputs["enc_lens:0"], 1), [1, beam_size]), [-1, 1]), 1)
		#	beam_z = tf.contrib.framework.nest.map_structure(_to_beam, z)
			
		#cell = AttnCell(cell_model=conf.cell_model, num_units=mem_size, num_layers=conf.num_layers,
		#				attn_type=conf.attention, memory=beam_memory, mem_lens=beam_memory_lens,
		#				max_mem_size=max_mem_size, addmem=conf.addmem, z=beam_z, keep_prob=conf.keep_prob,
		#				dtype=tf.float32)
		#with tf.variable_scope("DynDecode/AttnCell") as dyn_scope:
		decoder_multi_rnn_cells = CreateMultiRNNCell(conf.cell_model, num_units=mem_size, num_layers=conf.num_layers, output_keep_prob=conf.keep_prob)
		zero_cell_states = DecCellStateInit(beam_raw_dec_states, decoder_multi_rnn_cells, name="InitCell")

		attn_cell = AttnCellWrapper(cell=decoder_multi_rnn_cells, cell_init_states=zero_cell_states, attn_type=conf.attention,
									attn_size=mem_size, memory=beam_memory, mem_lens=beam_memory_lens, max_mem_size=max_mem_size,
									addmem=conf.addmem, z=beam_z, dtype=tf.float32, name="AttnWrapper")
			
		if self.conf.attention:
			dec_init_state = None 
		else:
			dec_init_state = beam_decoder.BeamState(tf.zeros_like(beam_memory_lens, tf.float32), zero_cell_states, tf.zeros_like(beam_memory_lens))
		with tf.variable_scope("OutProj"):
			graphlg.info("Creating out_proj...") 
			if conf.out_layer_size:
				w = tf.get_variable("proj_w", [conf.out_layer_size, conf.output_vocab_size], dtype=dtype)
			else:
				w = tf.get_variable("proj_w", [mem_size, conf.output_vocab_size], dtype=dtype)
			b = tf.get_variable("proj_b", [conf.output_vocab_size], dtype=dtype)
			out_proj = (w, b)

		if not for_deploy: 
			hp_train = helper1_2.ScheduledEmbeddingTrainingHelper(inputs=emb_dec_next_inps, sequence_length=inputs["dec_lens:0"], embedding=self.embedding,
																sampling_probability=0.0, out_proj=out_proj)
			output_layer = layers_core.Dense(conf.out_layer_size, use_bias=True) if conf.out_layer_size else None
			my_decoder = basic_decoder1_2.BasicDecoder(cell=attn_cell, helper=hp_train, initial_state=dec_init_state, output_layer=output_layer)
			cell_outs, final_state, seq_len = decoder1_2.dynamic_decode(decoder=my_decoder, impute_finished=True, maximum_iterations=conf.output_max_len + 1)

			#cell_outs = tf.Print(cell_outs, [tf.shape(cell_outs)], message="cell_outs_shape")
			with tf.name_scope("Logits"):
				L = tf.shape(cell_outs.rnn_output)[1]
				rnn_output = tf.reshape(cell_outs.rnn_output, [-1, int(out_proj[0].shape[0])])
				rnn_output = tf.matmul(rnn_output, out_proj[0]) + out_proj[1] 
				logits = tf.reshape(rnn_output, [-1, L, int(out_proj[0].shape[1])])

			with tf.name_scope("DebugOutputs") as scope:
				outputs = tf.argmax(logits, axis=2)
				outputs = tf.reshape(outputs, [-1, L])
				outputs = self.out_table.lookup(tf.cast(outputs, tf.int64))

			# branch 2 for loss
			with tf.name_scope("Loss") as scope:
				tars = tf.slice(dec_inps, [0, 1], [-1, L])
				# wgts may be a more complicated form, for example a partial down-weighting of a sequence
				# but here i just use  1.0 weights for all no-padding label
				wgts = tf.cumsum(tf.one_hot(inputs["dec_lens:0"], L), axis=1, reverse=True)
				#wgts = wgts * tf.expand_dims(self.down_wgts, 1)
				loss_matrix = loss.sequence_loss(logits=logits, targets=tars, weights=wgts, average_across_timesteps=False, average_across_batch=False)
				#bow_loss = tf.reduce_sum(batch_bow_losses * self.down_wgts) / batch_wgt
				example_total_wgts = tf.reduce_sum(wgts, 1)
				total_wgts = tf.reduce_sum(example_total_wgts) 

				example_losses = tf.reduce_sum(loss_matrix, 1)
				see_loss = tf.reduce_sum(example_losses) / total_wgts

				KLD = tf.reduce_sum(KLD * example_total_wgts) / total_wgts 
				self.loss = tf.reduce_sum(example_losses + conf.kld_ratio * KLD) / total_wgts 

			with tf.name_scope(self.model_kind):
				tf.summary.scalar("loss", see_loss)
				tf.summary.scalar("kld", KLD) 
				#tf.summary.scalar("bow", bow_loss)
				for each in tf.trainable_variables():
					tf.summary.histogram(each.name, each)
			graph_nodes = {
				"loss":self.loss,
				"inputs":inputs,
				"debug_outputs":outputs,
				"outputs":{},
				"visualize":None
			}
			return graph_nodes
		else:
			beam_batch_size = tf.shape(beam_memory_lens)[0]
			hp_infer = helper1_2.GreedyEmbeddingHelper(embedding=self.embedding, start_tokens=tf.ones([beam_batch_size], dtype=tf.int32),
														end_token=EOS_ID, out_proj=out_proj)
			output_layer = layers_core.Dense(conf.out_layer_size, use_bias=True) if conf.out_layer_size else None

				

			my_decoder = beam_decoder.BeamDecoder(cell=attn_cell, helper=hp_infer, out_proj=out_proj, initial_state=dec_init_state, beam_splits=conf.beam_splits,
													max_res_num=conf.max_res_num, output_layer=output_layer)
			#cell_outs, final_state = decoder.dynamic_decode(decoder=my_decoder, scope=scope, maximum_iterations=conf.output_max_len)
			cell_outs, final_state, seq_len = decoder1_2.dynamic_decode(decoder=my_decoder, impute_finished=True, maximum_iterations=conf.output_max_len + 1)

			L = tf.shape(cell_outs.beam_ends)[1]
			beam_symbols = cell_outs.beam_symbols
			beam_parents = cell_outs.beam_parents

			beam_ends = cell_outs.beam_ends
			beam_end_parents = cell_outs.beam_end_parents
			beam_end_probs = cell_outs.beam_end_probs
			alignments = cell_outs.alignments

			beam_ends = tf.reshape(tf.transpose(beam_ends, [0, 2, 1]), [-1, L])
			beam_end_parents = tf.reshape(tf.transpose(beam_end_parents, [0, 2, 1]), [-1, L])
			beam_end_probs = tf.reshape(tf.transpose(beam_end_probs, [0, 2, 1]), [-1, L])

			# Creating tail_ids 
			batch_size = beam_batch_size / beam_size
			batch_size = tf.Print(batch_size, [batch_size], message="BATCH")

			#beam_symbols = tf.Print(cell_outs.beam_symbols, [tf.shape(cell_outs.beam_symbols)], message="beam_symbols")
			#beam_parents = tf.Print(cell_outs.beam_parents, [tf.shape(cell_outs.beam_parents)], message="beam_parents")
			#beam_ends = tf.Print(cell_outs.beam_ends, [tf.shape(cell_outs.beam_ends)], message="beam_ends") 
			#beam_end_parents = tf.Print(cell_outs.beam_end_parents, [tf.shape(cell_outs.beam_end_parents)], message="beam_end_parents") 
			#beam_end_probs = tf.Print(cell_outs.beam_end_probs, [tf.shape(cell_outs.beam_end_probs)], message="beam_end_probs") 
			#alignments = tf.Print(cell_outs.alignments, [tf.shape(cell_outs.alignments)], message="beam_attns")

			batch_offset = tf.expand_dims(tf.cumsum(tf.ones([batch_size, beam_size], dtype=tf.int32) * beam_size, axis=0, exclusive=True), 2)
			offset2 = tf.expand_dims(tf.cumsum(tf.ones([batch_size, beam_size * 2], dtype=tf.int32) * beam_size, axis=0, exclusive=True), 2)

			out_len = tf.shape(beam_symbols)[1]
			self.beam_symbol_strs = tf.reshape(self.out_table.lookup(tf.cast(beam_symbols, tf.int64)), [batch_size, beam_size, -1])
			self.beam_parents = tf.reshape(beam_parents, [batch_size, beam_size, -1]) - batch_offset

			self.beam_ends = tf.reshape(beam_ends, [batch_size, beam_size * 2, -1])
			self.beam_end_parents = tf.reshape(beam_end_parents, [batch_size, beam_size * 2, -1]) - offset2
			self.beam_end_probs = tf.reshape(beam_end_probs, [batch_size, beam_size * 2, -1])
			self.beam_attns = tf.reshape(alignments, [batch_size, beam_size, out_len, -1])

			#cell_outs.alignments
			#self.outputs = tf.concat([outputs_str, tf.cast(cell_outs.beam_parents, tf.string)], 1)

			#ones = tf.ones([batch_size, self.beam_size], dtype=tf.int32)
			#aux_matrix = tf.cumsum(ones * self.beam_size, axis=0, exclusive=True)

			#tm_beam_parents_reverse = tf.reverse(tf.transpose(cell_outs.beam_parents), axis=[0])
			#beam_probs = final_state[1] 

			#def traceback(prev_out, curr_input):
			#	return tf.gather(curr_input, prev_out) 
			#	
			#tail_ids = tf.reshape(tf.cumsum(ones, axis=1, exclusive=True) + aux_matrix, [-1])
			#tm_symbol_index_reverse = tf.scan(traceback, tm_beam_parents_reverse, initializer=tail_ids)
			## Create beam index for symbols, and other info  
			#tm_symbol_index = tf.concat([tf.expand_dims(tail_ids, 0), tm_symbol_index_reverse], axis=0)
			#tm_symbol_index = tf.reverse(tm_symbol_index, axis=[0])
			#tm_symbol_index = tf.slice(tm_symbol_index, [1, 0], [-1, -1])
			#symbol_index = tf.expand_dims(tf.transpose(tm_symbol_index), axis=2)
			#symbol_index = tf.concat([symbol_index, tf.cumsum(tf.ones_like(symbol_index), exclusive=True, axis=1)], axis=2)

			## index alignments and output symbols
			#alignments = tf.gather_nd(cell_outs.alignments, symbol_index)
			#symbol_ids = tf.gather_nd(cell_outs.beam_symbols, symbol_index)

			## outputs and other info
			#self.others = [alignments, beam_probs]
			#self.outputs = self.out_table.lookup(tf.cast(symbol_ids, tf.int64))

			outputs = {
				"beam_symbols":self.beam_symbol_strs,
				"beam_parents":self.beam_parents,
				"beam_ends":self.beam_ends,
				"beam_end_parents":self.beam_end_parents,
				"beam_end_probs":self.beam_end_probs,
				"beam_attns":self.beam_attns
			}
			
			infer_inputs = {} 
			infer_inputs["enc_inps:0"] = inputs["enc_inps:0"]
			infer_inputs["enc_lens:0"] = inputs["enc_lens:0"]
			graph_nodes = {
				"loss":None,
				"inputs":infer_inputs,
				"outputs":outputs,
				"visualize":{"z":z}
			}

			return graph_nodes