Ejemplo n.º 1
0
 def _build(self):
     self._cell_forward = ph.GRUCell('cell_forward',
                                     input_size=self._input_size,
                                     state_size=self._state_size)
     self._cell_backward = ph.GRUCell('cell_backward',
                                      input_size=self._input_size,
                                      state_size=self._state_size)
Ejemplo n.º 2
0
 def _build(self):
     # 网络模块定义 --- build
     self._emb = photinia.Linear('EMB', self._voc_size, self._emb_size)
     self._cell = photinia.GRUCell('CELL', self._emb_size, self._state_size)
     self._lin = photinia.Linear('LIN', self._state_size, self._voc_size)
     # 输入定义
     seq = tf.placeholder(
         shape=(None, None, self._voc_size),
         dtype=photinia.dtype
     )
     seq_0 = seq[:, :-1, :]
     seq_1 = seq[:, 1:, :]
     batch_size = tf.shape(seq)[0]
     # RNN结构
     init_state = tf.zeros(
         shape=(batch_size, self._state_size),
         dtype=photinia.dtype
     )
     states = tf.scan(
         fn=self._rnn_step,
         elems=tf.transpose(seq_0, (1, 0, 2)),
         initializer=init_state
     )
     probs = tf.map_fn(
         fn=self._state_to_prob,
         elems=states
     )
     outputs = tf.map_fn(
         fn=self._prob_to_output,
         elems=probs
     )
     probs = tf.transpose(probs, (1, 0, 2))
     outputs = tf.transpose(outputs, (1, 0, 2))
     outputs = tf.concat((seq[:, 0:1, :], outputs), 1)
     loss = tf.reduce_mean(-tf.log(1e-5 + tf.reduce_sum(seq_1 * probs, 2)), 1)
     loss = tf.reduce_mean(loss)
     self._add_slot(
         'train',
         outputs=loss,
         inputs=seq,
         updates=tf.train.AdamOptimizer(1e-3).minimize(loss)
     )
     self._add_slot(
         'evaluate',
         outputs=outputs,
         inputs=seq
     )
     #
     word = tf.placeholder(
         shape=(None, self._voc_size),
         dtype=photinia.dtype
     )
     emb = self._emb.setup(word)
     emb = photinia.lrelu(emb)
     self._add_slot(
         'embedding',
         outputs=emb,
         inputs=word
     )
Ejemplo n.º 3
0
 def _build(self):
     ph.GRUCell('cell',
                self._wemb_size,
                self._state_size,
                with_bias=False,
                activation=self._activation,
                w_init=ph.TruncatedNormal(0, 1e-3),
                u_init=ph.TruncatedNormal(0, 1e-3))
Ejemplo n.º 4
0
 def _build(self):
     if self._emb_layer is None:
         self._emb_layer = ph.Linear('emb_layer', self._voc_size,
                                     self._emb_size)
     else:
         self._emb_size = self._emb_layer.output_size
     self._cell = ph.GRUCell('cell', self._emb_size, self._state_size)
     self._out_layer = ph.Linear('out_layer', self._state_size,
                                 self._voc_size)
Ejemplo n.º 5
0
    def _build(self):
        input_x = tf.tile(self._input_x, [self._num_mc_samples] + [1] * (len(self._input_x.shape) - 1))
        g_net = self._glimpse_network
        l_net = self._location_network

        input_stddev = tf.placeholder(
            shape=(),
            dtype=ph.dtype,
            name='input_stddev'
        )

        cell = self._cell = ph.GRUCell(
            'cell',
            g_net.output_size,
            self._state_size,
            w_init=ph.init.GlorotUniform()
        )
        batch_size = tf.shape(input_x)[0]
        init_state = tf.zeros(shape=(batch_size, self._state_size), dtype=ph.dtype)
        init_loc = tf.random_uniform((batch_size, 2), minval=-1, maxval=1)

        def _loop(acc, _):
            prev_state, loc, _ = acc
            g = g_net.setup(input_x, loc)
            state = cell.setup(g, prev_state)
            next_loc, next_mean = l_net.setup(state, input_stddev)
            return state, next_loc, next_mean

        states, locs, means = tf.scan(
            fn=_loop,
            elems=tf.zeros(shape=(self._num_steps,), dtype=tf.int8),
            initializer=(init_state, init_loc, init_loc)
        )  # (num_steps, batch_size, *)

        baseline_layer = self._baseline_layer = ph.Linear('baseline_layer', self._state_size, 1)

        def _make_baseline(state):
            baseline = baseline_layer.setup(state)  # (batch_size, 1)
            baseline = tf.reshape(baseline, (-1,))  # (batch_size,)
            return baseline

        baselines = tf.map_fn(_make_baseline, states)  # (num_steps, batch_size)
        baselines = tf.transpose(baselines)  # (batch_size, num_steps)

        predict_layer = self._predict_layer = ph.Linear('predict_layer', self._state_size, self._num_classes)
        last_state = states[-1]  # (batch_size, state_size)
        prob = predict_layer.setup(last_state)
        prob = tf.nn.softmax(prob)  # (batch_size, num_classes)
        label = tf.argmax(prob, 1)  # (batch_size,)
        self._step_predict = ph.Step(
            inputs=input_x,
            outputs=label,
            givens={input_stddev: 1e-3}
        )

        self._input_label = ph.placeholder('input_label', (None,), tf.int64)
        input_label = tf.tile(self._input_label, (self._num_mc_samples,))
        prob_ = tf.one_hot(input_label, self._num_classes)  # (batch_size, num_classes)
        predict_loss = self._predict_loss = -tf.reduce_mean(ph.ops.log_likelihood(prob_, prob))

        reward = tf.cast(tf.equal(label, input_label), tf.float32)  # (batch_size,)
        rewards = tf.reshape(reward, (-1, 1))  # (batch_size, 1)
        rewards = tf.tile(rewards, (1, self._num_steps))  # (batch_size, num_steps)
        rewards = tf.stop_gradient(rewards)
        baseline_loss = self._baseline_loss = tf.reduce_mean(ph.ops.mean_square_error(rewards, baselines))

        advantages = rewards - tf.stop_gradient(baselines)
        logll = self._log_gaussian(locs, means, input_stddev)
        logll = tf.reduce_sum(logll, 2)  # (num_steps, batch_size)
        logll = tf.transpose(logll)  # (batch_size, num_steps)
        logll_ratio = self._logll_ratio = tf.reduce_mean(logll * advantages)

        loss = self._loss = predict_loss - logll_ratio + baseline_loss
        if self._reg is not None:
            self._reg.setup(self.get_trainable_variables())
            update = self._optimizer.minimize(loss + self._reg.get_loss())
        else:
            update = self._optimizer.minimize(loss)
        self._step_train = ph.Step(
            inputs=(self._input_x, self._input_label),
            outputs=(loss, tf.reduce_mean(rewards)),
            updates=update,
            givens={input_stddev: self._stddev}
        )
Ejemplo n.º 6
0
 def _build(self):
     self._emb_layer = ph.Linear(
         'emb_layer', self._voc_size,
         self._emb_size) if self._emb_size is not None else None
     self._cell = ph.GRUCell('cell', self._emb_size, self._state_size)
Ejemplo n.º 7
0
 def _build(self):
     self._cell = ph.GRUCell('cell',
                             input_size=self._input_size,
                             state_size=self._state_size)
Ejemplo n.º 8
0
 def _build(self):
     self._emb_layer = ph.Linear('emb_layer', self._voc_size,
                                 self._emb_size)
     self._cell = ph.GRUCell('cell', self._emb_size, self._state_size)