Beispiel #1
0
    def _create_graph(self):
        self.x = Tensor(shape=[None, self.img_channels, self.img_height, self.img_width]).Variable()
        self.y_r = Tensor(shape=[None], name='Yr').Variable()

        # As implemented in A3C paper
        self.n1 = ops.Relu(ops.Conv2D([self.x] + self.weight_bias(), kernel_size=8, stride=4, num_output=16))
        self.n2 = ops.Relu(ops.Conv2D([self.n1] + self.weight_bias(), kernel_size=4, stride=2, num_output=32))

        self.action_index = Tensor(shape=[None, self.num_actions]).Variable()

        self.d1 = ops.Relu(ops.InnerProduct([self.n2] + self.weight_bias(), num_output=256))

        self.logits_v = ops.InnerProduct([self.d1] + self.weight_bias(), num_output=1)
        self.cost_v = ops.L2Loss([self.y_r, self.logits_v])

        self.logits_p = ops.InnerProduct([self.d1] + self.weight_bias(), num_output=self.num_actions)

        if Config.USE_LOG_SOFTMAX: raise NotImplementedError()
        else:
            self.softmax_p = ops.Softmax(self.logits_p)
            self.selected_action_prob = ops.Sum(self.softmax_p * self.action_index, axis=1)
            self.cost_p_1 = ops.Log(ops.Clip(self.selected_action_prob, self.log_epsilon, None)) * \
                            (self.y_r - ops.StopGradient(self.logits_v))
            self.cost_p_2 = ops.Sum(ops.Log(ops.Clip(self.softmax_p, self.log_epsilon, None)) *
                                      self.softmax_p, axis=1) * (-self.beta)
        self.cost_p_1_agg = ops.Sum(self.cost_p_1)
        self.cost_p_2_agg = ops.Sum(self.cost_p_2)
        self.cost_p = -(self.cost_p_1_agg + self.cost_p_2_agg)
        self.cost_all = self.cost_p + self.cost_v
        
        if Config.DUAL_RMSPROP: raise NotImplementedError()
        else:
            if Config.USE_GRAD_CLIP:
                self.opt = updaters.RMSPropUpdater(decay=Config.RMSPROP_DECAY,
                                                   eps=Config.RMSPROP_EPSILON,
                                                   clip_gradient=Config.GRAD_CLIP_NORM)
            else:
                self.opt = updaters.RMSPropUpdater(decay=Config.RMSPROP_DECAY,
                                                   eps=Config.RMSPROP_EPSILON)

        grads = T.grad(self.cost_all, self.network_params)
        for p, g in zip(self.network_params, grads):
            self.opt.append((p, g), lr_mult=1.0)
Beispiel #2
0
 def Setup(self, bottom):
     super(L2LossLayer, self).Setup(bottom)
     loss = ops.L2Loss(bottom, **self._param)
     if self._loss_weight is not None: loss *= self._loss_weight
     return loss
Beispiel #3
0
 def LayerSetup(self, bottom):
     loss = _ops.L2Loss(bottom, **self.arguments)
     if self._loss_weight is not None: loss *= self._loss_weight
     return loss
Beispiel #4
0
def l2_loss(t, name=None):
    return _ops.L2Loss(t, normalization='NONE', name=name)
Beispiel #5
0
 def Setup(self, bottom):
     super(L2LossLayer, self).Setup(bottom)
     return ops.L2Loss(bottom, **self._param)