예제 #1
0
 def p_p(cur_z):
     p_p_2_linear = self.bn(F.relu(F.dropout(self.p_p_2_lin(cur_z))))
     p_p_2_mean = self.p_p_2_m(p_p_2_linear)
     p_p_2_var = F.exp(self.p_p_2_v(p_p_2_linear))
     p_p_2 = D.GaussianDistribution(p_p_2_mean, p_p_2_var)
     z_2 = p_p_2.sample()
     return z_2, p_p_2
예제 #2
0
    def __init__(self,
                 n_input_channels,
                 action_size,
                 var,
                 n_hidden_layers=0,
                 n_hidden_channels=None,
                 min_action=None,
                 max_action=None,
                 bound_mean=False,
                 nonlinearity=F.relu):

        self.n_input_channels = n_input_channels
        self.action_size = action_size
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.min_action = min_action
        self.max_action = max_action
        self.bound_mean = bound_mean
        self.nonlinearity = nonlinearity
        self.var = var
        layers = []
        layers.append(L.Linear(n_input_channels, n_hidden_channels))
        for _ in range(n_hidden_layers - 1):
            layers.append(self.nonlinearity)
            layers.append(L.Linear(n_hidden_channels, n_hidden_channels))
        layers.append(L.Linear(n_hidden_channels, action_size))
        if self.bound_mean:
            layers.append(
                lambda x: bound_by_tanh(x, self.min_action, self.max_action))
        layers.append(lambda x: distribution.GaussianDistribution(
            x, self.xp.broadcast_to(self.var, x.shape)))
        super().__init__(*layers)
예제 #3
0
 def p_b(state):
     p_b_1_linear = self.bn(F.relu(F.dropout(self.p_b_1_lin(state))))
     p_b_1_mean = self.p_b_1_m(p_b_1_linear)
     p_b_1_var = F.exp(self.p_b_1_v(p_b_1_linear))
     p_b_1 = D.GaussianDistribution(p_b_1_mean, p_b_1_var)
     z_1 = p_b_1.sample()
     return z_1, p_b_1
예제 #4
0
 def setUp(self):
     self.mean = np.random.normal(size=(self.batch_size,
                                        self.ndim)).astype(np.float32)
     self.var = np.random.uniform(low=0.5,
                                  high=2.0,
                                  size=(self.batch_size,
                                        self.ndim)).astype(np.float32)
     self.distrib = distribution.GaussianDistribution(self.mean, self.var)
예제 #5
0
 def test_kl(self):
     # Compare it to chainer.functions.gaussian_kl_divergence
     standard = distribution.GaussianDistribution(
         mean=np.zeros((self.batch_size, self.ndim), dtype=np.float32),
         var=np.ones((self.batch_size, self.ndim), dtype=np.float32))
     kl = self.distrib.kl(standard)
     chainer_kl = chainer.functions.gaussian_kl_divergence(
         self.distrib.mean, self.distrib.ln_var)
     np.testing.assert_allclose(kl.data.sum(), chainer_kl.data, rtol=1e-5)
예제 #6
0
    def __init__(self,
                 n_input_channels,
                 action_size,
                 var,
                 n_hidden_layers=0,
                 n_hidden_channels=None,
                 min_action=None,
                 max_action=None,
                 bound_mean=False,
                 nonlinearity=F.relu,
                 mean_wscale=1):

        self.n_input_channels = n_input_channels
        self.action_size = action_size
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.min_action = min_action
        self.max_action = max_action
        self.bound_mean = bound_mean
        self.nonlinearity = nonlinearity
        if np.isscalar(var):
            self.var = np.full(action_size, var, dtype=np.float32)
        else:
            self.var = var
        layers = []
        if n_hidden_layers > 0:
            # Input to hidden
            layers.append(L.Linear(n_input_channels, n_hidden_channels))
            layers.append(self.nonlinearity)
            for _ in range(n_hidden_layers - 1):
                # Hidden to hidden
                layers.append(L.Linear(n_hidden_channels, n_hidden_channels))
                layers.append(self.nonlinearity)
            # The last layer is used to compute the mean
            layers.append(
                L.Linear(n_hidden_channels,
                         action_size,
                         initialW=LeCunNormal(mean_wscale)))
        else:
            # There's only one layer for computing the mean
            layers.append(
                L.Linear(n_input_channels,
                         action_size,
                         initialW=LeCunNormal(mean_wscale)))

        if self.bound_mean:
            layers.append(
                lambda x: bound_by_tanh(x, self.min_action, self.max_action))

        def get_var_array(shape):
            self.var = self.xp.asarray(self.var)
            return self.xp.broadcast_to(self.var, shape)

        layers.append(lambda x: distribution.GaussianDistribution(
            x, get_var_array(x.shape)))
        super().__init__(*layers)
예제 #7
0
    def __call__(self, mean):
        """Return a Gaussian with given mean.

        Args:
            mean (chainer.Variable or ndarray): Mean of Gaussian.

        Returns:
            chainerrl.distribution.Distribution: Gaussian whose mean is the
                mean argument and whose variance is computed from the parameter
                of this link.
        """
        var = F.broadcast_to(self.var_func(self.var_param), mean.shape)
        return distribution.GaussianDistribution(mean, var)
예제 #8
0
    def __call__(self, x, t):
        """
          X: (batch_size, how_many_prev, H, W)
          t: (batch_size, how_many_out, H, W)
        """

        #should be passed by __call__
        jmp_idx = random.randint(1, 4)  #step 3
        start_idx = random.randint(4, 10)  #step 2
        end_idx = jmp_idx + start_idx

        loss = None

        #Reshape to (784) from (28, 28)
        new_x = F.reshape(
            x.astype(np.float32),
            (x.shape[0], x.shape[1], x.shape[2] * x.shape[3])).data
        new_t = F.reshape(
            t.astype(np.float32),
            (t.shape[0], t.shape[1], t.shape[2] * t.shape[3])).data

        batch_size = x.shape[0]
        num_prev = x.shape[1]
        num_pred = t.shape[1]

        num_prev = x.shape[1]
        num_pred = t.shape[1]
        self.encoder.reset_state()
        """
        ==============================================
                        Training Loop
        ==============================================
        """
        for i in range(start_idx):
            inp = t[:, i, :, :].reshape(
                (batch_size, 1, 28, 28)).astype(np.float32)
            self.encoder(self.bn(self.lin1(F.dropout(F.relu(
                self.conv1(inp))))))

        state_at_t = self.encoder.h

        for i in range(start_idx, start_idx + jmp_idx):

            inp = t[:, i, :, :].reshape(
                (batch_size, 1, 28, 28)).astype(np.float32)
            self.encoder(self.bn(self.lin1(F.dropout(F.relu(
                self.conv1(inp))))))

        observation_at_jmp = new_t[:, start_idx + jmp_idx - 1, :]
        observation_at_start = new_t[:, start_idx, :]
        state_at_jmp = self.encoder.h
        """
        Get a sample of z_2_target
        """
        p_b_2_linear = self.bn(F.relu(F.dropout(self.p_b_2_lin(state_at_jmp))))
        p_b_2_mean = self.p_b_2_m(p_b_2_linear)
        p_b_2_var = F.exp(self.p_b_2_v(p_b_2_linear))
        p_b_2 = D.GaussianDistribution(p_b_2_mean, p_b_2_var)
        z_2_target = p_b_2.sample()
        """
        Get a sample of z_1_target
        """
        q_I_input = F.concat((z_2_target, state_at_jmp, state_at_t))
        q_I_linear = self.bn(F.relu(F.dropout(self.q_I_lin(q_I_input))))
        q_I_mean = self.q_I_m(q_I_linear)
        q_I_var = F.exp(self.q_I_v(q_I_linear))
        q_I = D.GaussianDistribution(q_I_mean, q_I_var)
        z_1_target = q_I.sample()
        """
        Get a sample of z_1 (sample state_at_t only)
        """
        def p_b(state):
            p_b_1_linear = self.bn(F.relu(F.dropout(self.p_b_1_lin(state))))
            p_b_1_mean = self.p_b_1_m(p_b_1_linear)
            p_b_1_var = F.exp(self.p_b_1_v(p_b_1_linear))
            p_b_1 = D.GaussianDistribution(p_b_1_mean, p_b_1_var)
            z_1 = p_b_1.sample()
            return z_1, p_b_1

        z_1, p_b_1 = p_b(state_at_t)
        """
        Get sample of z_2(from z_1)
        """

        def p_p(cur_z):
            p_p_2_linear = self.bn(F.relu(F.dropout(self.p_p_2_lin(cur_z))))
            p_p_2_mean = self.p_p_2_m(p_p_2_linear)
            p_p_2_var = F.exp(self.p_p_2_v(p_p_2_linear))
            p_p_2 = D.GaussianDistribution(p_p_2_mean, p_p_2_var)
            z_2 = p_p_2.sample()
            return z_2, p_p_2

        z_2, p_p_2 = p_p(z_1)
        """
        Get Reconstruction from z_2
        """

        def p_d(z):
            p_d_1_lin = F.relu(F.dropout(self.p_d_1(z)))
            conv_input = F.reshape(p_d_1_lin, (p_d_1_lin.shape[0], 20, 20, 20))
            recon = F.sigmoid(self.p_d_conv(conv_input))
            return F.reshape(recon, (z.shape[0], 784))

        recon = p_d(z_2_target)
        """
        ==============================================
                        Losses
        ==============================================
        """
        l_3 = F.mean_squared_error(p_d(z_1), observation_at_start)
        l_4 = F.mean_squared_error(p_d(z_1_target), observation_at_start)

        l_x = F.mean_squared_error(recon, observation_at_jmp)
        l_1 = p_b_2.log_prob(z_2_target) - p_p_2.log_prob(z_2_target)
        #switch?
        l_2 = q_I.kl(p_b_1)
        loss = 0
        #loss = l_x + F.sum(l_1) + F.sum(l_2) + l_3 + l_4
        loss += l_x
        #diverges with the log prob loss in its current form
        #  loss += .01 * F.sum(l_1) / batch_size
        #loss += .01 * F.sum(l_2) / batch_size
        loss += l_3
        loss += l_4
        """
        ==============================================
                        Testing Loop
        ==============================================
        """
        test_int = 100

        if self.it % test_int == 0:

            num_left = 5
            samples = np.zeros((batch_size, num_left + 1, 784))
            init_state, _ = p_b(state_at_jmp)
            local_recon = p_d(init_state)
            samples[:, 0, :] = chainer.backends.cuda.to_cpu(local_recon.data)
            for i in range(num_left - 1):
                i += 1

                update, _ = p_p(init_state)
                recon = p_d(update)
                samples[:, i, :] = chainer.backends.cuda.to_cpu(recon.data)
                init_state = update
            true_in = chainer.backends.cuda.to_cpu(new_t[:, end_idx, :])
            samples[:, num_left, :] = true_in
            fn = self.directory + str(self.it) + '.png'
            g_vis(samples[4, :, :].reshape((num_left + 1, 28, 28)),
                  save_file=fn)

        if (self.it % 4000 == 0) and (self.it != 0):
            #import pdb; pdb.set_trace()
            pass
        reporter.report(
            {
                'loss': loss,
                'KL': F.sum(l_2),
                'avg_KL': F.sum(l_2),
                'mse': l_x
            }, self)
        self.it += 1
        return loss
예제 #9
0
 def setUp(self):
     self.mean = np.random.rand(self.batch_size,
                                self.ndim).astype(np.float32)
     self.var = np.random.rand(self.batch_size,
                               self.ndim).astype(np.float32)
     self.distrib = distribution.GaussianDistribution(self.mean, self.var)
예제 #10
0
 def __call__(self, x):
     mean = self.hidden_layers(x)
     var = F.broadcast_to(self.var_func(self.var_param), mean.shape)
     return distribution.GaussianDistribution(mean, var)
예제 #11
0
 def __call__(self, x):
     mean, var = self.compute_mean_and_var(x)
     return distribution.GaussianDistribution(mean, var=var)
예제 #12
0
 def __call__(self, x, test=False):
     mean, var = self.compute_mean_and_var(x, test=test)
     return distribution.GaussianDistribution(mean, var=var)