Example #1
0
def grid_coord(guide, xx, yy, sz, small_sz, sigma_r, bs):
    gx = ((xx + 0.5) / sz) * small_sz
    gy = ((yy + 0.5) / sz) * small_sz
    expanded_guide = C.reshape(guide, [bs, 1, sz, sz])
    gz = expanded_guide * sigma_r
    fx = C.floor(gx - 0.5)
    fy = C.floor(gy - 0.5)
    fz = C.clip(C.floor(gz - 0.5), 0, sigma_r - 1)
    cx = C.element_min(fx + 1, small_sz - 1)
    cy = C.element_min(fy + 1, small_sz - 1)
    cz = C.clip(fz + 1, 0, sigma_r - 1)
    return gx, gy, gz, fx, fy, fz, cx, cy, cz
Example #2
0
def test_Clip(tmpdir):
    data = np.asarray([0.2, 1.3, 4., 5.5, 0.0], np.float32)
    min_v = 2
    max_v = 4
    model = C.clip(data, min_v, max_v)

    verify_no_input(model, tmpdir, 'clip_0')

    x = C.input_variable(data.shape)

    model = C.clip(x, min_v, max_v)

    verify_one_input(model, data, tmpdir, 'clip_1')
Example #3
0
def test_Clip(tmpdir):
    data = np.asarray([0.2, 1.3, 4., 5.5, 0.0], np.float32)
    min_v = 2
    max_v = 4
    model = C.clip(data, min_v, max_v)

    verify_no_input(model, tmpdir, 'clip_0')

    x = C.input_variable(data.shape)

    model = C.clip(x, min_v, max_v)

    verify_one_input(model, data, tmpdir, 'clip_1')
Example #4
0
def test_Clip(tmpdir, dtype):
    if (dtype == np.float16):
        pytest.skip("TO BE FIXED")
    with C.default_options(dtype=dtype):
        data = np.asarray([0.2, 1.3, 4., 5.5, 0.0], dtype)
        min_v = 2
        max_v = 4
        model = C.clip(data, min_v, max_v)

        verify_no_input(model, tmpdir, 'clip_0')

        x = C.input_variable(data.shape)

        model = C.clip(x, min_v, max_v)

        verify_one_input(model, data, tmpdir, 'clip_1')
Example #5
0
def test_Clip(tmpdir, dtype):
    if (dtype == np.float16):
        pytest.skip("TO BE FIXED")
    with C.default_options(dtype = dtype):
        data = np.asarray([0.2, 1.3, 4., 5.5, 0.0], dtype)
        min_v = 2
        max_v = 4
        model = C.clip(data, min_v, max_v)

        verify_no_input(model, tmpdir, 'clip_0')

        x = C.input_variable(data.shape)

        model = C.clip(x, min_v, max_v)

        verify_one_input(model, data, tmpdir, 'clip_1')
Example #6
0
def gaussian_mdn_loss(output_vector, target_vector, nmix: int, ndim: int):
    """
    Loss function for gaussian mixture density network. Usually used for regression problems.
    Mixture density networks are useful when trying to represent arbitrary conditional probabilities
    the same way a conventional neural network can represent arbitrary functions.

    Example:
        ndim, nmix = 1, 3
        input_tensor = C.input_variable(1, name="input_tensor")
        target_tensor = C.input_variable(1, name="target_tensor")

        # model
        inner = Dense(50, activation=C.relu)(input_tensor)
        inner = Dense(50, activation=C.relu)(inner)
        prediction_tensor = Dense((ndim + 2) * nmix, activation=None)(inner)

        loss = gaussian_mdn_loss(prediction_tensor, target_tensor, nmix=nmix, ndim=ndim)

    Arguments:
        output_vector: network output
        target_vector: ground truths (typically a continuous variable)
        nmix (int): number of mixtures
        ndim (int): number of dimensions in a gaussian kernel

    Returns:
        :class:`~cntk.ops.functions.Function`
    """

    @C.typemap
    def gaussian_mdn_phi(target, mu, sigma, ndim: int):
        """
        Calculates phi between the target tensor and the network prediction
        Does not assumes independence between components of target.

        Arguments:
            target: target tensor with shape (ndim, )
            mu: means of gaussian mdn with shape (nmix, ndim)
            sigma: sigma of gaussian mdn
            nmix (int): number of mixtures
            ndim (int): number of dimensions in gaussian

        Returns:
            :class:`~cntk.ops.functions.Function`
        """
        if not len(mu.shape) == 2:
            raise ValueError("mu {0} must have shape (nmix, ndim)".format(mu.shape))

        t = C.expand_dims(target, axis=0)

        exp_term = C.exp(C.negate(C.square(C.reduce_l2(t - mu, axis=-1)) / (2 * C.square(sigma))))
        factor = C.reciprocal((2 * pi) ** (ndim / 2) * C.pow(sigma, ndim))
        return factor * exp_term

    alpha, mu, sigma = gaussian_mdn_coeff(output_vector, nmix=nmix, ndim=ndim)
    phi = gaussian_mdn_phi(target_vector, mu, sigma, ndim=ndim)
    loss = C.negate(C.log(C.clip(C.reduce_sum(alpha * phi, axis=0), 1e-10, 1e10)))
    return loss
def clip(x, min_value, max_value):
    """Element-wise value clipping.
    If min_value > max_value, clipping range is [min_value,min_value].
    # Arguments
        x: Tensor or variable.
        min_value: Tensor, float, int, or None.
            If min_value is None, defaults to -infinity.
        max_value: Tensor, float, int, or None.
            If max_value is None, defaults to infinity.
    # Returns
        A tensor.
    """
    if max_value is None:
        max_value = np.inf
    if min_value is None:
        min_value = -np.inf
    max_value = C.maximum(min_value, max_value)

    return C.clip(x, min_value, max_value)
Example #8
0
    def build(self):
        input_kernel = C.Parameter(shape=(self._input_size, self._hidden_dim),
                                   init=self._input_initializer)
        recur_kernel = C.Parameter(shape=(self._hidden_dim, ),
                                   init=self._recurrent_initializer)
        bias = C.Parameter(shape=(self._hidden_dim), init=0)
        if self._recurrent_min_abs > 0:
            abs_kernel = C.abs(recur_kernel)
            min_abs_kernel = C.element_max(abs_kernel, self._recurrent_min_abs)
            recur_kernel = min_abs_kernel * C.element_select(
                C.greater_equal(recur_kernel, C.constant(0)), C.constant(1),
                C.constant(-1))
        if self._recurrent_max_abs:
            recur_kernel = C.clip(recur_kernel, -self._recurrent_max_abs,
                                  self._recurrent_max_abs)

        @C.Function
        def runit(h, x):
            h_t = C.times(x, input_kernel) + bias + recur_kernel * h
            return h_t

        return runit
Example #9
0
    def Loss(self):
        # Evaluating old actions and values :
        logprobs, state_value, dist_entropy = self.policy.evaluate()

        # Finding the ratio (pi_theta / pi_theta__old): # (importance sampling)
        c_old_logprobs = C.input_variable(logprobs.shape, name='old_log_probs')
        ratios = C.exp(logprobs - C.stop_gradient(c_old_logprobs))

        c_rewards = C.input_variable(1, name='rewards')
        advantages = c_rewards - C.stop_gradient(state_value)

        # Finding Surrogate Loss:
        surr1 = ratios * advantages
        surr2 = C.clip(ratios, 1 - self.eps_clip,
                       1 + self.eps_clip) * advantages
        neglog_loss = -C.element_min(surr1, surr2)
        entropy_loss = -0.01 * dist_entropy
        actor_loss = C.reduce_mean(neglog_loss + entropy_loss)
        critic_loss = 0.5 * C.reduce_mean(C.square(state_value - c_rewards))
        loss = actor_loss + critic_loss

        chunk = {
            'neglog_loss': neglog_loss,
            'entropy_loss': entropy_loss,
            'actor_loss': actor_loss,
            'critic_loss': critic_loss
        }

        trainer = C.Trainer(
            loss, (loss, None),
            C.adam(loss.parameters,
                   C.learning_parameter_schedule_per_sample(self.lr),
                   C.momentum_schedule_per_sample(self.betas[0]),
                   variance_momentum=C.momentum_schedule_per_sample(
                       self.betas[1])))
        # trainer = C.Trainer(loss, (loss, None), C.adam(loss.parameters, C.learning_parameter_schedule(10), C.momentum_schedule(0.9), variance_momentum=C.momentum_schedule(0.999))) # higher learning rate

        return loss, chunk, trainer
Example #10
0
def main():
    show_image = False
    if show_image:
        bs = 1
        ci = 3
        co = 3
        cg = co * (ci + 1)
        gd = 8
        gh = 64
        gw = 64
        h = 256
        w = 256
    else:
        bs = 1
        ci = 3
        co = 3
        cg = co * (ci + 1)
        gd = 8
        gh = 64
        gw = 64
        h = 1024
        w = 1024

    im = C.input_variable([bs, ci, h, w], needs_gradient=True, dynamic_axes=[])
    guide = C.input_variable([bs, h, w], needs_gradient=True, dynamic_axes=[])
    guide_no_grad = C.input_variable([bs, h, w],
                                     needs_gradient=False,
                                     dynamic_axes=[])
    grid = C.input_variable([bs, cg, gd, gh, gw],
                            needs_gradient=True,
                            dynamic_axes=[])
    # Create indices
    xx = np.arange(0, w).reshape(1, -1).repeat(h, 0).astype(np.float32)
    yy = np.arange(0, h).reshape(-1, 1).repeat(w, 1).astype(np.float32)
    xx = C.Constant(xx, xx.shape)
    yy = C.Constant(yy, yy.shape)
    gx = ((xx + 0.5) / w) * gw
    gy = ((yy + 0.5) / h) * gh
    gz = C.clip(guide, 0.0, 1.0) * gd
    gz_no_grad = C.clip(guide_no_grad, 0.0, 1.0) * gd
    fx = C.element_max(C.floor(gx - 0.5), 0.0)
    fy = C.element_max(C.floor(gy - 0.5), 0.0)
    fz = C.element_max(C.floor(gz - 0.5), 0.0)
    fz_no_grad = C.element_max(C.floor(gz_no_grad - 0.5), 0.0)
    wx = gx - 0.5 - fx
    wy = gy - 0.5 - fy
    wx = C.expand_dims(C.expand_dims(wx, -1 - len(wx.shape)),
                       -1 - len(wx.shape))
    wy = C.expand_dims(C.expand_dims(wy, -1 - len(wy.shape)),
                       -1 - len(wy.shape))
    wz = C.abs(gz - 0.5 - fz)
    wz = C.expand_dims(wz, 0)
    fx = C.expand_dims(C.expand_dims(fx, -1 - len(fx.shape)),
                       -1 - len(fx.shape))
    fy = C.expand_dims(C.expand_dims(fy, -1 - len(fy.shape)),
                       -1 - len(fy.shape))
    cx = C.element_min(fx + 1, gw - 1)
    cy = C.element_min(fy + 1, gh - 1)
    cz = C.element_min(fz_no_grad + 1, gd - 1)
    batch_idx = np.arange(bs).reshape(bs, 1, 1, 1).astype(np.float32)
    batch_idx = C.Constant(batch_idx, batch_idx.shape)
    out = []
    flat_grid = C.reshape(grid, [-1])
    for c_ in range(co):
        c_idx = np.arange((ci + 1) * c_,
                          (ci + 1) * (c_ + 1)).reshape(1, ci + 1, 1,
                                                       1).astype(np.float32)
        c_idx = C.Constant(c_idx, c_idx.shape)

        def flatten_and_gather(x, y, z):
            linear_idx = x + gw * y + gw * gh * z + c_idx * gw * gh * gd + batch_idx * gw * gh * gd * cg
            flat_linear_idx = C.reshape(linear_idx, [-1])
            return C.reshape(C.gather(flat_grid, flat_linear_idx),
                             linear_idx.shape)

        gather_fff = flatten_and_gather(fx, fy, fz_no_grad)
        gather_ffc = flatten_and_gather(fx, fy, cz)
        gather_fcf = flatten_and_gather(fx, cy, fz_no_grad)
        gather_fcc = flatten_and_gather(fx, cy, cz)
        gather_cff = flatten_and_gather(cx, fy, fz_no_grad)
        gather_cfc = flatten_and_gather(cx, fy, cz)
        gather_ccf = flatten_and_gather(cx, cy, fz_no_grad)
        gather_ccc = flatten_and_gather(cx, cy, cz)
        a = gather_fff*(1-wx)*(1-wy)*(1-wz) + \
            gather_ffc*(1-wx)*(1-wy)*(  wz) + \
            gather_fcf*(1-wx)*(  wy)*(1-wz) + \
            gather_fcc*(1-wx)*(  wy)*(  wz) + \
            gather_cff*(  wx)*(1-wy)*(1-wz) + \
            gather_cfc*(  wx)*(1-wy)*(  wz) + \
            gather_ccf*(  wx)*(  wy)*(1-wz) + \
            gather_ccc*(  wx)*(  wy)*(  wz)
        o = C.reduce_sum(a[:, :-1, ...] * im, 1) + a[:, -1, ...]
        print(o.shape)
        out.append(C.expand_dims(o, 0))
    out = C.splice(*out, axis=1)
    loss = C.reduce_l2(out)

    grid_val = np.random.rand(bs, cg, gd, gh, gw).astype(np.float32)
    if show_image:
        guide_val = skio.imread("/data/rgb.png").mean(2)[:h, :w].astype(
            np.float32)
        guide_val = np.expand_dims(guide_val / 255.0, 0)
        im_val = np.tile(np.expand_dims(guide_val, 1), [1, 3, 1, 1])
        out_val = out.eval({
            im: im_val,
            guide: guide_val,
            guide_no_grad: guide_val,
            grid: grid_val
        })
        out_val = np.clip(np.transpose(np.squeeze(out_val), [1, 2, 0]), 0, 1)
        skio.imsave("/output/imout.png", out_val)
    else:
        im_val = np.random.randn(bs, ci, h, w)
        guide_val = np.random.rand(bs, h, w).astype(np.float32)
        # burning iteration
        for it in range(5):
            print('burning (', it, ')')
            g = loss.grad({
                im: im_val,
                guide: guide_val,
                guide_no_grad: guide_val,
                grid: grid_val
            })
        # actual iterations
        start = time.time()
        for it in range(50):
            print('profiling (', it, ')')
            g = loss.grad({
                im: im_val,
                guide: guide_val,
                guide_no_grad: guide_val,
                grid: grid_val
            })
        end = time.time()
    runtime = (end - start) * 1000.0 / 50.0
    print('Runtime:', runtime)
Example #11
0
def main():
    bs = 4
    c = 64
    h = 512
    w = 512

    im = C.input_variable([bs, c, h, w], needs_gradient=True, dynamic_axes=[])
    warp = C.input_variable([bs, 2, h, w],
                            needs_gradient=True,
                            dynamic_axes=[])
    warp_ng = C.input_variable([bs, 2, h, w],
                               needs_gradient=False,
                               dynamic_axes=[])
    # Create indices
    dx = 0.5 * (warp[:, 0, :, :] + 1.0)
    dy = 0.5 * (warp[:, 1, :, :] + 1.0)
    new_x = C.clip(dx * w, 0, w)
    new_y = C.clip(dy * h, 0, h)
    fx = C.clip(C.floor(new_x), 0, w - 2)
    fy = C.clip(C.floor(new_y), 0, h - 2)
    wx = new_x - fx
    wy = new_y - fy
    dx_ng = 0.5 * (warp_ng[:, 0, :, :] + 1.0)
    dy_ng = 0.5 * (warp_ng[:, 1, :, :] + 1.0)
    new_x_ng = C.clip(dx_ng * w, 0, w)
    new_y_ng = C.clip(dy_ng * h, 0, h)
    fx_ng = C.clip(C.floor(new_x_ng), 0, w - 2)
    fy_ng = C.clip(C.floor(new_y_ng), 0, h - 2)

    chan_idx = np.arange(c).reshape(1, c, 1, 1)
    chan_idx = C.Constant(chan_idx, chan_idx.shape)
    batch_idx = np.arange(bs).reshape(bs, 1, 1, 1)
    batch_idx = C.Constant(batch_idx, batch_idx.shape)
    flat_im = C.reshape(im, [-1])

    def flatten_and_gather(x, y):
        linear_idx = x + w * y + w * h * chan_idx + w * h * c * batch_idx
        flat_linear_idx = C.reshape(linear_idx, [-1])
        return C.reshape(C.gather(flat_im, flat_linear_idx), linear_idx.shape)

    gather_ff = flatten_and_gather(fx_ng, fy_ng)
    gather_fc = flatten_and_gather(fx_ng, fy_ng + 1)
    gather_cf = flatten_and_gather(fx_ng + 1, fy_ng)
    gather_cc = flatten_and_gather(fx_ng + 1, fy_ng + 1)
    out = gather_ff*(1-wx)*(1-wy) + \
          gather_fc*(1-wx)*(  wy) + \
          gather_cf*(  wx)*(1-wy) + \
          gather_cc*(  wx)*(  wy)
    loss = C.reduce_l2(out)

    im_val = np.random.randn(bs, c, h, w).astype(np.float32)
    warp_val = np.random.rand(bs, 2, h, w).astype(np.float32)
    # burning iteration
    for it in range(5):
        print('burning (', it, ')')
        g = loss.grad({im: im_val, warp: warp_val, warp_ng: warp_val})
    # actual iterations
    start = time.time()
    for it in range(50):
        print('profiling (', it, ')')
        g = loss.grad({im: im_val, warp: warp_val, warp_ng: warp_val})
    end = time.time()
    runtime = (end - start) * 1000.0 / 50.0
    print('Runtime:', runtime)
def main():
  bs = 4
  c = 16
  h = 512
  w = 512

  im = C.input_variable([bs, c, h, w], needs_gradient=True, dynamic_axes=[])
  affine_mtx = C.input_variable([bs, 2, 3], needs_gradient=True, dynamic_axes=[])
  affine_mtx_ng = C.input_variable([bs, 2, 3], needs_gradient=False, dynamic_axes=[])
  xx = np.arange(0, w).reshape(1, -1).repeat(h, 0).astype(np.float32)
  yy = np.arange(0, h).reshape(-1, 1).repeat(w, 1).astype(np.float32)
  xx = C.Constant(xx, xx.shape)
  yy = C.Constant(yy, yy.shape) 
  nrm_x = 2.0 * (xx / w) - 1.0
  nrm_y = 2.0 * (yy / h) - 1.0
  nrm_x = C.expand_dims(nrm_x, -1 - len(nrm_x.shape))
  nrm_y = C.expand_dims(nrm_y, -1 - len(nrm_y.shape))
  xformed_x = affine_mtx[:, 0, 0] * nrm_x + \
              affine_mtx[:, 0, 1] * nrm_y + \
              affine_mtx[:, 0, 2]
  xformed_y = affine_mtx[:, 1, 0] * nrm_x + \
              affine_mtx[:, 1, 1] * nrm_y + \
              affine_mtx[:, 1, 2]
  xformed_x = 0.5 * xformed_x + 1.0
  xformed_y = 0.5 * xformed_y + 1.0
  xformed_x = C.expand_dims(xformed_x, 0)
  xformed_y = C.expand_dims(xformed_y, 0)
  xformed_x_ng = affine_mtx_ng[:, 0, 0] * nrm_x + \
                 affine_mtx_ng[:, 0, 1] * nrm_y + \
                 affine_mtx_ng[:, 0, 2]
  xformed_y_ng = affine_mtx_ng[:, 1, 0] * nrm_x + \
                 affine_mtx_ng[:, 1, 1] * nrm_y + \
                 affine_mtx_ng[:, 1, 2]
  xformed_x_ng = C.expand_dims(xformed_x_ng, 0)
  xformed_y_ng = C.expand_dims(xformed_y_ng, 0)

  fx = C.clip(w * xformed_x, 0, w-2)
  fy = C.clip(h * xformed_y, 0, h-2)
  wx = xformed_x - fx
  wy = xformed_y - fy
  fx_ng = C.clip(w * xformed_x_ng, 0, w-2)
  fy_ng = C.clip(h * xformed_y_ng, 0, h-2)

  chan_idx = np.arange(c).reshape(1, c, 1, 1)
  chan_idx = C.Constant(chan_idx, chan_idx.shape)
  batch_idx = np.arange(bs).reshape(bs, 1, 1, 1)
  batch_idx = C.Constant(batch_idx, batch_idx.shape)
  flat_im = C.reshape(im, [-1])
  def flatten_and_gather(x, y):
    linear_idx = x + w*y
    linear_idx = linear_idx + w*h*chan_idx + w*h*c*batch_idx
    flat_linear_idx = C.reshape(linear_idx, [-1])
    return C.reshape(C.gather(flat_im, flat_linear_idx),linear_idx.shape)
  gather_ff = flatten_and_gather(fx_ng    , fy_ng    )
  gather_fc = flatten_and_gather(fx_ng    , fy_ng + 1)
  gather_cf = flatten_and_gather(fx_ng + 1, fy_ng    )
  gather_cc = flatten_and_gather(fx_ng + 1, fy_ng + 1)
  out = gather_ff*(1-wx)*(1-wy) + \
        gather_fc*(1-wx)*(  wy) + \
        gather_cf*(  wx)*(1-wy) + \
        gather_cc*(  wx)*(  wy)
  loss = C.reduce_l2(out)

  im_val = np.random.randn(bs, c, h, w).astype(np.float32)
  affine_mtx_val = np.zeros([bs, 2, 3], dtype=np.float32)
  affine_mtx_val[:, 0, 1] = 1.0
  affine_mtx_val[:, 1, 0] = 1.0
  # burning iteration
  for it in range(5):
    print('burning (', it, ')')
    g = loss.grad({im : im_val, affine_mtx : affine_mtx_val, affine_mtx_ng : affine_mtx_val})
  # actual iterations
  start = time.time()
  for it in range(50):
    print('profiling (', it, ')')
    g = loss.grad({im : im_val, affine_mtx : affine_mtx_val, affine_mtx_ng : affine_mtx_val})
  end = time.time()
  runtime = (end-start)*1000.0/50.0
  print('Runtime:', runtime)
Example #13
0
    def run(self):
        while self.episode < EPISODES:
            obs, action, pred, reward = self.get_batch()
            obs, action, pred, reward = obs[:
                                            BUFFER_SIZE], action[:
                                                                 BUFFER_SIZE], pred[:
                                                                                    BUFFER_SIZE], reward[:
                                                                                                         BUFFER_SIZE]
            old_prediction = pred

            #
            # from IPython import embed;embed(header='run')
            # exit()
            #
            # pred_values = self.critic.predict(obs)

            # advantage = reward - pred_values

            # actor_loss = self.actor.fit([obs, advantage, old_prediction], [action], batch_size=BATCH_SIZE, shuffle=True, epochs=EPOCHS, verbose=False)
            # critic_loss = self.critic.fit([obs], [reward], batch_size=BATCH_SIZE, shuffle=True, epochs=EPOCHS, verbose=False)
            # self.writer.add_scalar('Actor loss', actor_loss.history['loss'][-1], self.gradient_steps)
            # self.writer.add_scalar('Critic loss', critic_loss.history['loss'][-1], self.gradient_steps)

            #region actor training
            pred_values = self.critic.eval({self.critic.arguments[0]: obs})

            advantage = reward - pred_values

            # actor_loss =
            c_action = C.input_variable(action.shape[-1], name='action')
            c_prediction = C.input_variable(old_prediction.shape[-1],
                                            name='old_prediction')
            c_advantage = C.input_variable(1, name='advantage')

            prob = C.reduce_sum(c_action * self.actor)
            old_prob = C.reduce_sum(c_action * c_prediction)
            ratio = prob / (old_prob + 1e-10)
            surr1 = c_advantage * ratio
            surr2 = c_advantage * C.clip(ratio, 1 - LOSS_CLIPPING,
                                         1 + LOSS_CLIPPING)
            # loss = -C.reduce_mean(C.element_min(surr1, surr2) + ENTROPY_LOSS * -(prob * C.log(prob + 1e-10))) # from keras
            neglog_loss = -C.element_min(surr1, surr2)
            entropy_loss = -ENTROPY_LOSS * -(prob * C.log(prob + 1e-10))
            # loss = -C.element_min(surr1, surr2) - ENTROPY_LOSS * -(prob * C.log(prob + 1e-10)) # from keras
            # loss = -C.element_min(surr1, surr2) + ENTROPY_LOSS * -(prob * C.log(prob + 1e-10)) # from pytorch ???
            loss = C.reduce_mean(neglog_loss + entropy_loss)
            actor_loss = loss

            trainer = C.Trainer(
                actor_loss, (actor_loss, None),
                C.adam(actor_loss.parameters,
                       C.learning_parameter_schedule_per_sample(LR),
                       C.learning_parameter_schedule_per_sample(0.99)))

            avg = 0
            avg_out = {neglog_loss.output: 0, entropy_loss.output: 0}
            for epoch in range(EPOCHS):
                data_size = action.shape[0]
                suffle_idx = random.sample(list(range(data_size)), data_size)

                mb_action = action[suffle_idx]
                mb_obs = obs[suffle_idx]
                mb_old_prediction = old_prediction[suffle_idx]
                mb_advantage = advantage[suffle_idx]

                updated, out = trainer.train_minibatch(dict(
                    zip(actor_loss.arguments,
                        [mb_advantage, mb_action, mb_obs, mb_old_prediction])),
                                                       outputs=[
                                                           neglog_loss.output,
                                                           entropy_loss.output
                                                       ])
                # print(trainer.previous_minibatch_loss_average)
                avg += trainer.previous_minibatch_loss_average
                avg_out[neglog_loss.output] += out[neglog_loss.output].mean()
                avg_out[entropy_loss.output] += out[entropy_loss.output].mean()
#endregion
            self.writer.add_scalar('Actor loss', avg / EPOCHS,
                                   self.gradient_steps)
            self.writer.add_scalar('neglog loss',
                                   avg_out[neglog_loss.output] / EPOCHS,
                                   self.gradient_steps)
            self.writer.add_scalar('entropy loss',
                                   avg_out[entropy_loss.output] / EPOCHS,
                                   self.gradient_steps)

            #region critic training
            c_reward = C.input_variable(1, name='reward')
            loss = C.reduce_mean(C.square(self.critic - c_reward))
            critic_loss = loss

            trainer = C.Trainer(
                critic_loss, (critic_loss, None),
                C.adam(critic_loss.parameters,
                       C.learning_parameter_schedule_per_sample(LR),
                       C.learning_parameter_schedule_per_sample(0.99)))

            avg = 0
            for epoch in range(EPOCHS):
                data_size = action.shape[0]
                suffle_idx = random.sample(list(range(data_size)), data_size)

                mb_obs = obs[suffle_idx]
                mb_reward = reward[suffle_idx]

                trainer.train_minibatch(
                    dict(zip(critic_loss.arguments, [mb_obs, mb_reward])))
                # print(trainer.previous_minibatch_loss_average)
                avg += trainer.previous_minibatch_loss_average


#endregion
            self.writer.add_scalar('Critic loss', avg / EPOCHS,
                                   self.gradient_steps)

            self.gradient_steps += 1