Beispiel #1
0
    def _build(self):
        input_image = ph.placeholder('input_image',
                                     (None, vgg.HEIGHT, vgg.WIDTH, 3),
                                     ph.float)
        encoder = vgg.VGG16('encoder')
        encoder.setup(input_image)
        h = encoder['h7']

        dropout = ph.Dropout('dropout')
        h = dropout.setup(h)

        dense = ph.Linear('dense', encoder.fc7.output_size, self._num_classes)
        y = dense.setup(h)
        y = tf.nn.softmax(y)
        label = tf.argmax(y, axis=1)

        self.predict = ph.Step(inputs=input_image,
                               outputs=(label, y),
                               givens={dropout.keep_prob: 1.0})

        input_label = ph.placeholder('input_label', (None, ), ph.int)
        y_target = tf.one_hot(input_label, self._num_classes)
        loss = ph.ops.cross_entropy(y_target, y)
        loss = tf.reduce_mean(loss)

        var_list = dense.get_trainable_variables()
        reg = ph.reg.L2Regularizer(1e-6)
        reg.setup(var_list)
        grad_list = [
            tf.clip_by_value(grad, -10, 10)
            for grad in tf.gradients(loss + reg.get_loss(), var_list)
        ]
        lr = ph.train.ExponentialDecayedValue('lr_train',
                                              1e-4,
                                              num_loops=1e4,
                                              min_value=1e-5)
        update = tf.train.AdamOptimizer(lr.value).apply_gradients(
            zip(grad_list, var_list))
        self.train = ph.Step(inputs=(input_image, input_label),
                             outputs=loss,
                             updates=(update, lr.update_op),
                             givens={dropout.keep_prob: self._keep_prob})

        var_list = self.get_trainable_variables()
        reg = ph.reg.L2Regularizer(1e-7)
        reg.setup(var_list)
        grad_list = [
            tf.clip_by_value(grad, -10, 10)
            for grad in tf.gradients(loss + reg.get_loss(), var_list)
        ]
        lr = ph.train.ExponentialDecayedValue('lr_fine_tune',
                                              2e-5,
                                              num_loops=3e4,
                                              min_value=1e-6)
        update = tf.train.AdamOptimizer(lr.value).apply_gradients(
            zip(grad_list, var_list))
        self.fine_tune = ph.Step(inputs=(input_image, input_label),
                                 outputs=loss,
                                 updates=(update, lr.update_op),
                                 givens={dropout.keep_prob: self._keep_prob})
Beispiel #2
0
    def _build(self):
        image = ph.placeholder('input_image', (None, vgg.HEIGHT, vgg.WIDTH, 3),
                               ph.float)
        encoder = vgg.VGG16('encoder')
        encoder.setup(image)
        h = encoder['h7']

        dropout = ph.Dropout('dropout')
        h = dropout.setup(h)

        dense = ph.Linear('dense', encoder.fc7.output_size, NUM_CLASSES)
        y = dense.setup(h)
        y = tf.nn.sigmoid(y)

        self.predict = ph.Step(inputs=image,
                               outputs=y,
                               givens={dropout.keep_prob: 1.0})

        target = ph.placeholder('target', (None, NUM_CLASSES), ph.float)
        loss = ph.ops.cross_entropy(target, y)
        loss = tf.reduce_mean(loss)

        var_list = dense.get_trainable_variables()
        reg = ph.reg.L2Regularizer(1e-6)
        reg.setup(var_list)
        grad_list = [
            tf.clip_by_value(grad, -10, 10)
            for grad in tf.gradients(loss + reg.get_loss(), var_list)
        ]
        lr = ph.train.ExponentialDecayedValue('lr_train',
                                              1e-4,
                                              num_loops=3e3,
                                              min_value=1e-5)
        update = tf.train.AdamOptimizer(lr.value).apply_gradients(
            zip(grad_list, var_list))
        self.train = ph.Step(inputs=(image, target),
                             outputs=loss,
                             updates=update,
                             givens={dropout.keep_prob: self._keep_prob})

        var_list = self.get_trainable_variables()
        reg = ph.reg.L2Regularizer(1e-7)
        reg.setup(var_list)
        grad_list = [
            tf.clip_by_value(grad, -10, 10)
            for grad in tf.gradients(loss + reg.get_loss(), var_list)
        ]
        lr = ph.train.ExponentialDecayedValue('lr_fine_tune',
                                              2e-5,
                                              num_loops=2e4,
                                              min_value=1e-6)
        update = tf.train.AdamOptimizer(lr.value).apply_gradients(
            zip(grad_list, var_list))
        self.fine_tune = ph.Step(inputs=(image, target),
                                 outputs=loss,
                                 updates=update,
                                 givens={dropout.keep_prob: self._keep_prob})
Beispiel #3
0
    def _build(self):
        self._step_get_state = ph.Step(inputs=(*self._input_list,
                                               self._prev_state),
                                       outputs=self._state)

        #
        # reset gradient
        grad_zeros_list = [
            tf.zeros(shape=var_.shape,
                     dtype=var_.dtype,
                     name='grad_' + ph.utils.get_basename(var_.name) + '_init')
            for var_ in self._var_list
        ]
        self._grad_acc_list = [
            ph.variable(name='grad_' + ph.utils.get_basename(grad_zero.name),
                        initial_value=grad_zero,
                        trainable=False) for grad_zero in grad_zeros_list
        ]
        self._step_reset_grad = ph.Step(updates=tf.group(*[
            tf.assign(grad, value)
            for grad, value in zip(self._grad_acc_list, grad_zeros_list)
        ]))

        #
        # update gradient
        grad_state = ph.placeholder(
            name='grad_' + ph.utils.get_basename(self._prev_state.name),
            shape=self._prev_state.shape,
            dtype=self._prev_state.dtype)
        grad_weight = ph.placeholder('grad_weight', ())
        grad_list = tf.gradients(self._state, self._var_list, grad_state)
        grad_prev_state = tf.gradients(self._state, [self._prev_state],
                                       grad_state)[0]
        self._step_update_grad = ph.Step(
            inputs=(*self._input_list, self._prev_state, grad_state,
                    grad_weight),
            outputs=(grad_prev_state, tf.reduce_sum(tf.abs(grad_prev_state))),
            updates=tf.group(*[
                tf.assign_add(grad_acc, grad * grad_weight)
                for grad_acc, grad in zip(self._grad_acc_list, grad_list)
            ]))

        #
        # apply gradient
        self._step_apply_grad = ph.Step(
            updates=self._optimizer.apply_gradients(
                zip(self._grad_acc_list, self._var_list)))
Beispiel #4
0
    def _build(self):
        encoder = vgg.VGG16('encoder')
        image = ph.placeholder('image', (None, vgg.HEIGHT, vgg.WIDTH, 3),
                               ph.float)
        encoder.setup(image)
        h7 = encoder['h7']

        self.step = ph.Step(inputs=image, outputs=h7)
        self.predict = self.step
Beispiel #5
0
    def _build(self):
        encoder = Encoder('seq_encoder', self._voc_size, self._emb_size,
                          self._state_size)
        decoder = Decoder('seq_decoder', self._voc_size, self._emb_size,
                          self._state_size, encoder.emb_layer)
        self._encoder = encoder
        self._decoder = decoder

        seq = ph.placeholder('seq', (None, None, self._voc_size))
        max_len = tf.shape(seq)[1]
        h = encoder.setup(seq)
        seq_ = decoder.setup(h, max_len)
        self._seq = seq
        self._h = h
        self._seq_ = seq_

        loss = -ph.ops.log_likelihood(seq, seq_,
                                      reduce=False)  # (batch_size, seq_length)
        seq_len = ph.ops.sequence_length(seq)
        mask = tf.sequence_mask(seq_len,
                                dtype=ph.dtype)  # (batch_size, seq_length)
        loss = ph.ops.reduce_sum_loss(loss * mask)
        self._loss = loss

        reg = ph.reg.Regularizer()
        reg.add_l1_l2(self.get_trainable_variables())

        update = self._optimizer.minimize(
            loss + reg.get_loss(self._reg) if self._reg > 0 else loss)
        self.train = ph.Step(inputs=seq,
                             outputs={
                                 'seq_': seq_,
                                 'loss': loss
                             },
                             updates=update)
        self.test = ph.Step(inputs=seq,
                            outputs={
                                'h': h,
                                'seq_': seq_,
                                'loss': loss
                            })
Beispiel #6
0
    def _build(self):
        x = ph.placeholder('x', shape=(None, self._input_size), dtype=ph.float)

        hidden_layer = ph.Linear('hidden_layer',
                                 input_size=self._input_size,
                                 output_size=self._hidden_size)
        out_layer = ph.Linear('out_layer',
                              input_size=self._hidden_size,
                              output_size=self._num_classes)
        dropout = ph.Dropout('dropout')

        y = ph.setup(
            x, [hidden_layer, ph.ops.lrelu, dropout, out_layer, tf.nn.softmax])
        label = tf.argmax(y, axis=1)

        self.predict = ph.Step(inputs=x,
                               outputs=(label, y),
                               givens={dropout.keep_prob: 1.0})

        true_label = ph.placeholder('true_label', shape=(None, ), dtype=ph.int)
        target = tf.one_hot(true_label, self._num_classes)
        loss = ph.ops.cross_entropy(target, y)
        loss = tf.reduce_mean(loss)

        var_list = self.get_trainable_variables()
        reg = ph.reg.L2Regularizer(1e-6)
        reg.setup(var_list)
        grad_list = [
            tf.clip_by_value(grad, -10, 10)
            for grad in tf.gradients(loss + reg.get_loss(), var_list)
        ]
        lr = ph.train.ExponentialDecayedValue('lr_train',
                                              1e-4,
                                              num_loops=2e4,
                                              min_value=1e-6)
        update = tf.train.AdamOptimizer(lr.value).apply_gradients(
            zip(grad_list, var_list))
        self.train = ph.Step(inputs=(x, true_label),
                             outputs=loss,
                             updates=(update, lr.update_op),
                             givens={dropout.keep_prob: self._keep_prob})
Beispiel #7
0
    def _build(self):
        input_image = tf.placeholder(shape=(None, 784),
                                     dtype=tf.float32,
                                     name='input_image')
        hidden_layer = ph.Linear('hidden_layer', 784, self._hidden_size)
        output_layer = ph.Linear('output_layer', self._hidden_size, 10)
        y = ph.setup(input_image,
                     [hidden_layer, ph.ops.lrelu, output_layer, tf.nn.softmax])
        label = tf.argmax(y, 1)
        input_label = tf.placeholder(shape=(None, ),
                                     dtype=tf.int64,
                                     name='input_label')
        y_ = tf.one_hot(input_label, 10, dtype=tf.float32)
        loss = ph.ops.cross_entropy(y_, y)
        loss = tf.reduce_mean(loss)

        self.train = ph.Step(inputs=(input_image, input_label),
                             outputs=loss,
                             updates=tf.train.RMSPropOptimizer(
                                 1e-4, 0.9, 0.9).minimize(loss))
        self.predict = ph.Step(inputs=input_image, outputs=label)
Beispiel #8
0
    def _build(self):
        input_x = tf.tile(self._input_x, [self._num_mc_samples] + [1] * (len(self._input_x.shape) - 1))
        g_net = self._glimpse_network
        l_net = self._location_network

        input_stddev = tf.placeholder(
            shape=(),
            dtype=ph.dtype,
            name='input_stddev'
        )

        cell = self._cell = ph.GRUCell(
            'cell',
            g_net.output_size,
            self._state_size,
            w_init=ph.init.GlorotUniform()
        )
        batch_size = tf.shape(input_x)[0]
        init_state = tf.zeros(shape=(batch_size, self._state_size), dtype=ph.dtype)
        init_loc = tf.random_uniform((batch_size, 2), minval=-1, maxval=1)

        def _loop(acc, _):
            prev_state, loc, _ = acc
            g = g_net.setup(input_x, loc)
            state = cell.setup(g, prev_state)
            next_loc, next_mean = l_net.setup(state, input_stddev)
            return state, next_loc, next_mean

        states, locs, means = tf.scan(
            fn=_loop,
            elems=tf.zeros(shape=(self._num_steps,), dtype=tf.int8),
            initializer=(init_state, init_loc, init_loc)
        )  # (num_steps, batch_size, *)

        baseline_layer = self._baseline_layer = ph.Linear('baseline_layer', self._state_size, 1)

        def _make_baseline(state):
            baseline = baseline_layer.setup(state)  # (batch_size, 1)
            baseline = tf.reshape(baseline, (-1,))  # (batch_size,)
            return baseline

        baselines = tf.map_fn(_make_baseline, states)  # (num_steps, batch_size)
        baselines = tf.transpose(baselines)  # (batch_size, num_steps)

        predict_layer = self._predict_layer = ph.Linear('predict_layer', self._state_size, self._num_classes)
        last_state = states[-1]  # (batch_size, state_size)
        prob = predict_layer.setup(last_state)
        prob = tf.nn.softmax(prob)  # (batch_size, num_classes)
        label = tf.argmax(prob, 1)  # (batch_size,)
        self._step_predict = ph.Step(
            inputs=input_x,
            outputs=label,
            givens={input_stddev: 1e-3}
        )

        self._input_label = ph.placeholder('input_label', (None,), tf.int64)
        input_label = tf.tile(self._input_label, (self._num_mc_samples,))
        prob_ = tf.one_hot(input_label, self._num_classes)  # (batch_size, num_classes)
        predict_loss = self._predict_loss = -tf.reduce_mean(ph.ops.log_likelihood(prob_, prob))

        reward = tf.cast(tf.equal(label, input_label), tf.float32)  # (batch_size,)
        rewards = tf.reshape(reward, (-1, 1))  # (batch_size, 1)
        rewards = tf.tile(rewards, (1, self._num_steps))  # (batch_size, num_steps)
        rewards = tf.stop_gradient(rewards)
        baseline_loss = self._baseline_loss = tf.reduce_mean(ph.ops.mean_square_error(rewards, baselines))

        advantages = rewards - tf.stop_gradient(baselines)
        logll = self._log_gaussian(locs, means, input_stddev)
        logll = tf.reduce_sum(logll, 2)  # (num_steps, batch_size)
        logll = tf.transpose(logll)  # (batch_size, num_steps)
        logll_ratio = self._logll_ratio = tf.reduce_mean(logll * advantages)

        loss = self._loss = predict_loss - logll_ratio + baseline_loss
        if self._reg is not None:
            self._reg.setup(self.get_trainable_variables())
            update = self._optimizer.minimize(loss + self._reg.get_loss())
        else:
            update = self._optimizer.minimize(loss)
        self._step_train = ph.Step(
            inputs=(self._input_x, self._input_label),
            outputs=(loss, tf.reduce_mean(rewards)),
            updates=update,
            givens={input_stddev: self._stddev}
        )
Beispiel #9
0
    def _build(self):
        input_image = ph.placeholder('input_image',
                                     (None, alexnet.HEIGHT, alexnet.WIDTH, 3),
                                     ph.float)
        encoder = alexnet.AlexNet('encoder', ph.ops.swish)
        dropout = ph.Dropout('dropout')
        dense = ph.Linear('dense', encoder['dense_7'].output_size,
                          self._hidden_size)
        output_layer = ph.Linear('output_layer', dense.output_size,
                                 self._num_classes + 1)

        encoder.setup(input_image)
        y = ph.setup(
            encoder['feature_7'],
            [dense, ph.ops.swish, dropout, output_layer, tf.nn.softmax])
        label = tf.argmax(y, axis=1)

        self.predict = ph.Step(inputs=input_image,
                               outputs=(label, y),
                               givens={dropout.keep_prob: 1.0})

        input_label = ph.placeholder('input_label', (None, ), ph.int)
        y_target = tf.one_hot(input_label, self._num_classes + 1)
        loss = -ph.ops.log_likelihood(y_target, y)
        loss = tf.reduce_mean(loss)

        ################################################################################
        # pre-train
        ################################################################################
        vars_new = [
            *dense.get_trainable_variables(),
            *output_layer.get_trainable_variables()
        ]
        reg = ph.reg.L2Regularizer(self._reg)
        reg.setup(vars_new)
        lr = ph.train.ExponentialDecayedValue('lr_1',
                                              init_value=self._learning_rate_1,
                                              num_loops=self._num_loops_1,
                                              min_value=self._learning_rate_1 /
                                              10)
        update_1 = tf.train.AdamOptimizer(lr.value).apply_gradients([
            (tf.clip_by_value(g, -self._grad_clip, self._grad_clip), v)
            for g, v in zip(tf.gradients(loss +
                                         reg.get_loss(), vars_new), vars_new)
            if g is not None
        ])
        # with tf.control_dependencies([update_1]):
        #     update_2 = ph.train.L2Regularizer(self._reg).apply(vars_new)
        self.train = ph.Step(inputs=(input_image, input_label),
                             outputs=(loss, lr.variable),
                             updates=update_1,
                             givens={dropout.keep_prob: self._keep_prob})

        ################################################################################
        # fine tune
        ################################################################################
        vars_all = self.get_trainable_variables()
        reg = ph.reg.L2Regularizer(self._reg)
        reg.setup(vars_all)
        lr = ph.train.ExponentialDecayedValue('lr_2',
                                              init_value=self._learning_rate_2,
                                              num_loops=self._num_loops_2,
                                              min_value=self._learning_rate_2 /
                                              10)
        update_1 = tf.train.AdamOptimizer(lr.value).apply_gradients([
            (tf.clip_by_value(g, -self._grad_clip, self._grad_clip), v)
            for g, v in zip(tf.gradients(loss +
                                         reg.get_loss(), vars_all), vars_all)
            if g is not None
        ])
        # with tf.control_dependencies([update_1]):
        #     update_2 = ph.train.L2Regularizer(self._reg).apply(vars_all)
        self.fine_tune = ph.Step(inputs=(input_image, input_label),
                                 outputs=(loss, lr.variable),
                                 updates=update_1,
                                 givens={dropout.keep_prob: self._keep_prob})
Beispiel #10
0
    def _build(self):
        encoder = vgg.VGG16('encoder')
        dense1 = ph.Linear('dense1',
                           encoder.fc7.output_size,
                           4096,
                           w_init=ph.init.TruncatedNormal(0, 1e-3))
        dense2 = ph.Linear('dense2',
                           4096,
                           self._num_classes,
                           w_init=ph.init.TruncatedNormal(0, 1e-3))
        input_image = ph.placeholder('input_image',
                                     (None, vgg.HEIGHT, vgg.WIDTH, 3),
                                     ph.float)
        input_label = ph.placeholder('input_label', (None, ), ph.int)

        self._num_gpus -= 1
        batch_size = tf.shape(input_image)[0]
        num_per_device = tf.cast(tf.ceil(batch_size / self._num_gpus),
                                 tf.int32)

        var_list1 = [
            *dense1.get_trainable_variables(),
            *dense2.get_trainable_variables()
        ]
        var_list2 = self.get_trainable_variables()

        y_list = []
        loss_list = []
        grad_list_list1 = []
        grad_list_list2 = []
        for i in range(self._num_gpus):
            with tf.device(f'/gpu:{i + 1}'):
                input_image_i = input_image[i * num_per_device:(i + 1) *
                                            num_per_device]
                encoder.setup(input_image_i)
                h = encoder['h7'] if i == 0 else encoder[f'h7_{i}']
                y = ph.ops.lrelu(dense1.setup(h) + h)
                y = tf.nn.softmax(dense2.setup(y))
                y_list.append(y)

                input_label_i = input_label[i * num_per_device:(i + 1) *
                                            num_per_device]
                y_target = tf.one_hot(input_label_i, self._num_classes)
                loss = ph.ops.cross_entropy(y_target, y)
                loss = tf.reduce_mean(loss)
                loss_list.append(loss)

                reg1 = ph.reg.L2Regularizer(1e-6)
                reg1.setup(var_list1)
                grad_list1 = tf.gradients(loss + reg1.get_loss(), var_list1)
                grad_list_list1.append(grad_list1)

                reg2 = ph.reg.L2Regularizer(1e-6)
                reg2.setup(var_list2)
                grad_list2 = tf.gradients(loss + reg2.get_loss(), var_list2)
                grad_list_list2.append(grad_list2)

        y = tf.concat(y_list, axis=0)
        loss = tf.reduce_mean(loss_list)

        grad_list1 = [
            tf.reduce_mean(grads, axis=0) for grads in zip(*grad_list_list1)
        ]
        self.train = ph.Step(inputs=(input_image, input_label),
                             outputs=loss,
                             updates=tf.train.RMSPropOptimizer(
                                 1e-5, 0.9, 0.9).apply_gradients(
                                     zip(grad_list1, var_list1)))

        grad_list2 = [
            tf.reduce_mean(grads, axis=0) for grads in zip(*grad_list_list2)
        ]
        self.fine_tune = ph.Step(inputs=(input_image, input_label),
                                 outputs=loss,
                                 updates=tf.train.RMSPropOptimizer(
                                     1e-6, 0.9, 0.9).apply_gradients(
                                         zip(grad_list2, var_list2)))

        label = tf.argmax(y, axis=1)
        self.predict = ph.Step(inputs=input_image, outputs=(label, y))
Beispiel #11
0
    def _build(self):
        input_seq0 = self._input_seq0 = ph.placeholder(
            'input_seq0', (None, None, self._voc_size))
        input_seq1 = self._input_seq1 = ph.placeholder(
            'input_seq1', (None, None, self._voc_size))
        encoder = self._encoder = seqae.Encoder('encoder',
                                                voc_size=self._voc_size,
                                                emb_size=self._emb_size,
                                                state_size=self._state_size)
        emb0 = self._emb0 = encoder.setup(input_seq0)
        emb1 = self._emb1 = encoder.setup(input_seq1)

        self.predict = ph.Step(inputs=input_seq0, outputs=emb0)

        decoder = self._decoder = seqae.Decoder('seq_decoder', self._voc_size,
                                                self._emb_size,
                                                self._state_size,
                                                encoder.emb_layer)
        max_len0 = tf.shape(input_seq0)[1]
        max_len1 = tf.shape(input_seq1)[1]
        rec0 = self._rec0 = decoder.setup(emb0, max_len0)
        rec1 = self._rec1 = decoder.setup(emb1, max_len1)

        #
        # reconstruction loss of the two sequences
        loss_rec0 = -ph.ops.log_likelihood(input_seq0, rec0, reduce=False)
        loss_rec0 *= tf.sequence_mask(ph.ops.sequence_length(input_seq0),
                                      dtype=ph.dtype)
        loss_rec0 = ph.ops.reduce_sum_loss(loss_rec0)
        loss_rec0 = tf.reduce_mean(loss_rec0)
        self._loss_rec0 = loss_rec0

        loss_rec1 = -ph.ops.log_likelihood(input_seq1, rec1, reduce=False)
        loss_rec1 *= tf.sequence_mask(ph.ops.sequence_length(input_seq1),
                                      dtype=ph.dtype)
        loss_rec1 = ph.ops.reduce_sum_loss(loss_rec1)
        loss_rec1 = tf.reduce_mean(loss_rec1)
        self._loss_rec1 = loss_rec1

        loss_rec = self._loss_rec = loss_rec0 + loss_rec1

        #
        # semantic loss
        # norm0 = tf.norm(emb0, axis=1, keepdims=True)
        # norm1 = tf.norm(emb1, axis=1, keepdims=True)
        # s = tf.reduce_sum(emb0 * emb1, axis=1) / (norm0 * norm1 + 1e-6)
        # s = tf.reduce_mean(s)
        #
        # loss_semantic = self._loss_semantic = 1.0 - s

        loss_semantic = tf.reduce_sum(tf.square(emb0 - emb1), axis=1)
        loss_semantic = tf.reduce_mean(loss_semantic)
        self._loss_semantic = loss_semantic

        #
        # train step
        loss = self._loss = loss_rec + self._semantic_weight * loss_semantic
        reg = ph.reg.Regularizer()
        reg.add_l1_l2(self.get_trainable_variables())
        update = self._optimizer.minimize(
            loss +
            reg.get_loss(self._reg_weight) if self._reg_weight > 0 else loss)
        self.train = ph.Step(inputs=(input_seq0, input_seq1),
                             outputs={
                                 'loss': loss,
                                 'loss_rec': loss_rec,
                                 'loss_semantic': loss_semantic,
                                 'rec0': rec0,
                                 'rec1': rec1
                             },
                             updates=update)
Beispiel #12
0
    def _build(self):
        source_actor = self._source_actor
        target_actor = self._target_actor
        source_critic = self._source_critic
        target_critic = self._target_critic

        #
        # connect source
        input_source_state = self._input_source_state
        source_action = source_actor.setup(input_source_state)
        source_reward = source_critic.setup(input_source_state, source_action)

        #
        # connect target
        input_target_state = self._input_target_state
        target_action = target_actor.setup(input_target_state)
        target_reward = target_critic.setup(input_target_state, target_action)

        #
        # predict
        self._step_predict = ph.Step(inputs=input_source_state,
                                     outputs=source_action)

        #
        # train critic
        input_reward = self._input_reward
        y = input_reward + self._gamma * target_reward
        critic_loss = tf.reduce_mean(tf.square(y - source_reward))
        var_list = source_critic.get_trainable_variables()
        reg_loss = ph.reg.Regularizer().add_l1_l2(var_list).get_loss(
            self._reg_weight)
        self._step_train_critic = ph.Step(
            inputs=(input_source_state, source_action, input_reward,
                    input_target_state),
            outputs=critic_loss,
            updates=self._optimizer.minimize(critic_loss + reg_loss,
                                             var_list=var_list))

        #
        # train actor
        var_list = source_actor.get_trainable_variables()
        actor_loss = -tf.reduce_mean(source_reward)
        reg_loss = ph.reg.Regularizer().add_l1_l2(var_list).get_loss(
            self._reg_weight)
        self._step_train_actor = ph.Step(inputs=input_source_state,
                                         outputs=actor_loss,
                                         updates=self._optimizer.minimize(
                                             actor_loss + reg_loss,
                                             var_list=var_list))

        #
        # update target networks
        source_var_list = source_critic.get_trainable_variables(
        ) + source_actor.get_trainable_variables()
        target_var_liet = target_critic.get_trainable_variables(
        ) + target_actor.get_trainable_variables()
        self._step_update_target = ph.Step(updates=tf.group(*[
            tf.assign(v_target, self._tao * v_source +
                      (1.0 - self._tao) * v_target)
            for v_source, v_target in zip(source_var_list, target_var_liet)
        ]))

        #
        # init the target networks
        self._step_init_target = ph.Step(updates=tf.group(*[
            tf.assign(v_target, v_source)
            for v_source, v_target in zip(source_var_list, target_var_liet)
        ]))