def forward(self, x, eps=1.e-6, noise=None):
        nb = len(x)
        noise_rate, noise_sigma = 0, 1
        if noise:
            if isinstance(noise,tuple):
                noise_rate, noise_sigma = noise
            else:
                noise_rate = noise
        noise_rate = noise_rate if noise_rate <= 1 else 1
        noise_rate = noise_rate if 0 <= noise_rate else 0
        noise_sigma = noise_sigma if 0 < noise_sigma else np.abs(noise_sigma)

        size = (nb, self.latent_dim)
        if 1:
            z = np.random.randn(
                np.array(size).prod()
                ).reshape(size).astype('float32')
        else:
            z = np.random.uniform(
                -1, 1, np.array(size).prod()
            ).reshape(size).astype('float32')
        self.xz = self.gen(z)


        if noise_rate > 0:
            noised = x.copy()
            _noise_idx = np.random.permutation(nb)
            _noise_idx = _noise_idx[np.random.permutation(int(nb*noise_rate))]
            _noise = np.random.randn(len(_noise_idx))*noise_sigma
            noised[_noise_idx] += _noise.reshape(-1, 1)
            self.Disx = self.dis(noised)
        else:
            self.Disx = self.dis(x)
        self.Disxz = self.dis(self.xz)

        self.real_cost = - rm.sum(
            rm.log(self.Disx + eps))/nb
        self.fake_cost = - rm.sum(
            rm.log(1 - self.Disxz + eps))/nb
        self.GAN_loss = self.real_cost + self.fake_cost

        self.real_count = (self.Disx >= 0.5).sum()/nb
        self.fake_count = (self.Disxz < 0.5).sum()/nb

        if self.gan_mode == 'minmax':
            self.gen_loss = - self.GAN_loss
        elif self.gan_mode == 'non-saturating':
            self.gen_loss = - rm.sum(rm.log(self.Disxz + eps))/nb
        
        return self.xz
예제 #2
0
 def predict(self, src_seq, beam_width=10):
     src_seq = src_seq[::-1]
     xi = [self.src_w2i.get(word, self.src_w2i['<unk>']) for word in src_seq] # input word to index 
     xi = np.array(xi).reshape(len(xi),1)
     xe = self.l1(xi) # index to vector(embedding)
     # encode
     for x in xe:
         h = self.encode(x.reshape(1,-1))
         
     # decode
     cnt = 1
     limit = 100
     L = 0
     H = {}
     H['z'] = h
     H['state'] = self.l2._state
     word = '<bos>'
     sentence = [word]
     t = (L, sentence, H)
     Q = [t]
     is_all_eos = False
     while is_all_eos == False and cnt <= limit + 1: # limit + 1 for <'eos'>
         cand = list()
         is_all_eos = True
         for L, sentence, H in Q:
             self.l4._z = H['z']
             self.l4._state = H['state']
             word = sentence[-1]
             
             if word == '<eos>':
                 t = (L, sentence, H)
                 cand.append(t)
             else:
                 is_all_eos = False
                 yi = [self.tar_w2i[word]]
                 yi = np.array(yi).reshape(len(yi),-1)
                 ye = self.l3(yi)
                 y = ye.reshape(1,-1)
                 yy = self.decode(y)
                 p = rm.softmax(yy)
                 p = rm.log(p).as_ndarray()
                 p = p[0]
                 z = {}
                 z['z'] = self.l4._z
                 z['state'] = self.l4._state
                 for i in range(self.tar_vocab_size):
                     w = self.tar_i2w[i]
                     s = sentence + [w]
                     l = L + p[i]
                     t = (l, s, z)
                     cand.append(t)
                     
         cand = sorted(cand, key=lambda tup:tup[0], reverse=True)
         Q = cand[:beam_width]
         cnt += 1
     self.truncate()
     _, sentence, _ = Q[0]
     return sentence
    def forward(self, x, eps=1e-3):
        nb = len(x)
        size = (nb, self.latent_dim)
        zp = np.random.randn(np.array(size).prod()).reshape(size).astype('float32')
        self.xp = self.gen(zp)
        self.Dis_xp = self.dis(self.xp)
        self.Dis_xp_is = self.dis.raw_output
        self.Dis_x = self.dis(x)

        self.real_cost = - rm.sum(rm.log(self.Dis_x + eps))/nb
        self.fake_cost = - rm.sum(rm.log(1 - self.Dis_xp + eps))/nb
        self.GAN_loss = self.real_cost + self.fake_cost

        gan_mode = 'non-saturating'
        if gan_mode == 'minmax':
            self.gen_loss = - self.GAN_loss
        elif gan_mode == 'non-saturating':
            self.gen_loss = - rm.sum(rm.log(self.Dis_xp + eps))/nb
        elif gan_mode == 'max-likelihood':
            self.gen_loss = - rm.sum(rm.exp(self.Dis_xp_is))/nb

        return self.GAN_loss
예제 #4
0
 def func(node):
     return sum(rm.log(node))
    def forward(self, x, y=None, eps=1e-3):
        # x : input data
        # y : one-hot label data for categorical dist. or supporting dis.
        #     empty is not assignment
        # self.qzx : style z
        # self.rep : input data for decoding
        nb = len(x)

        # --- encoding phase --- 
        if 0:
            noise = random.randn(x.size).reshape(nb, x.shape[1])*0.03
            self._x = x+noise
        else:
            _x = x
        if self.mode=='clustering' or self.mode=='reduction':
            self.qzx, self.qyx = self.enc(_x)
        else:
            self.qzx = self.enc(_x)

        # --- decoding/reconstruction phase ---
        if self.mode=='clustering' or self.mode=='reduction':
            self.recon = self.dec(rm.concat(self.qzx, self.qyx))
        else:
            self.recon = self.dec(self.qzx)

        # --- reguralization phase --- 
        if self.mode == 'incorp_label':
            self._set_incorpdist(x)
        else:
            self._set_distribution(x)
            if self.mode == 'clustering':
                "categorical dist"
            elif self.mode == 'supervised':
                ""
            elif self.mode == 'dim_reduction':
                "" 

        if self.mode == 'incorp_label':
            self._incorp_label(x, y, eps=eps)
        else:
            self.Dpz = self.dis(self.pz)
            self.Dqzx = self.dis(self.qzx)
            self.real = -rm.sum(rm.log(
                self.Dpz + eps
            ))/nb
            self.fake = -rm.sum(rm.log(
                1 - self.Dqzx + eps
            ))/nb
            self.fake2pos = -rm.sum(rm.log(
                self.Dqzx + eps
            ))/nb 
        if self.mode=='clustering' or self.mode=='reduction':
            _idx = np.where(y.sum(1)==1)[0]
            idx_ = np.where(y.sum(1)==0)[0]
            if len(_idx) > 0:
                self.Cy = self.cds(y)
                self.Cqyx = self.cds(self.qyx)
                self.Creal = -rm.sum(rm.log(
                    self.Cy[_idx] + eps
                ))/len(_idx)
                if 0:
                    self.Cfake = -rm.sum(rm.log(
                        1 - self.Cqyx[_idx] + eps
                    ))/len(_idx)
                else:
                    self.Cfake = -rm.sum(rm.log(
                        1 - self.Cqyx + eps
                    ))/nb
                self.Cfake2 = -rm.sum(rm.log(
                    self.Cqyx[_idx] + eps
                ))/len(_idx)
            else:
                self.Cfake = rm.Variable(0)
                self.Creal = rm.Variable(0)
                self.Cfake2 = rm.Variable(0)

        # --- sumalizing loss ---
        self.gan_loss = self.real + self.fake
        if self.mode=='clustering':
            if len(_idx) > 0:
                self.reconE = rm.mean_squared_error(
                    self.recon[idx_], x[idx_])
            else:
                self.reconE = rm.mean_squared_error(self.recon, x)
        else:
            self.reconE = rm.mean_squared_error(self.recon, x)
        self.real_count = (self.Dpz >= 0.5).sum()/nb
        self.fake_count = (self.Dqzx < 0.5).sum()/nb
        self.enc_loss = self.fake2pos
        if self.mode=='clustering' or self.mode=='reduction':
            if len(_idx) > 0:
                self.Creal_count = (self.Cy[_idx] >= 0.5).sum()/len(_idx)
                self.Cfake_count = (self.Cqyx[_idx] < 0.5).sum()/len(_idx)
            else:
                self.Creal_count = 0
                self.Cfake_count = 0
            self.CganE = self.Creal + self.Cfake
            self.CgenE = self.Cfake2

        return self.recon
 def func(_dir, x):
     return -rm.sum(rm.log(
             x+eps if _dir=='pos' else 1-x+eps))
예제 #7
0
    def fit(self, epoch=1, epoch_step=250000, test_step=None):
        """
        This method executes training of actor critic.
        Test will be runned after each epoch is done.

        Args:
            epoch (int): Number of epoch for training.
            epoch_step (int): Number of step of one epoch.
            test_step (int): Number steps during test.
        """

        # check
        assert isinstance(self.logger, Logger), "logger must be Logger class"
        self.logger._key_check(log_key=_a2c_keys,
                               log_key_epoch=_a2c_keys_epoch)

        # creating local variables
        envs = self.envs
        test_env = self.test_env
        advantage = self._advantage
        threads = self._num_worker
        gamma = self.gamma
        gradient_clipping = self.gradient_clipping
        value_coef = self.value_coef
        entropy_coef = self.entropy_coef

        # env start(after reset)
        [self.envs[_t].start() for _t in range(threads)]

        # logging
        step_counts_log = np.zeros((
            advantage,
            threads,
        ))
        step_count = 0
        episode_counts_log = np.zeros((
            advantage,
            threads,
        ))
        episode_counts = np.zeros((threads, ))

        # epoch
        for e in range(1, epoch + 1):

            # r,a,r,t,s+1
            states = np.zeros((advantage, threads, *test_env.state_shape))
            actions = np.zeros((advantage, threads, 1))
            rewards = np.zeros((advantage, threads, 1))
            dones = np.zeros((advantage, threads, 1))
            states_next = np.zeros((advantage, threads, *test_env.state_shape))

            # value, target value function
            values = np.zeros((advantage, threads, 1))
            target_rewards = np.zeros((advantage + 1, threads, 1))

            # logging
            sum_rewards_log = np.zeros((
                advantage,
                threads,
            ))
            sum_rewards = np.zeros((threads, ))
            continuous_steps_log = np.zeros((
                advantage,
                threads,
            ))
            continuous_steps = np.zeros((threads, ))
            epoch_steps_log = np.zeros((
                advantage,
                threads,
            ))
            epoch_steps_j = 0
            nth_episode_counts_log = np.zeros((
                advantage,
                threads,
            ))
            nth_episode_counts = np.zeros((threads, ))

            # env epoch
            _ = [self.envs[_t].epoch() for _t in range(threads)]

            # initiallize
            states[0] = np.array([envs[i].reset() for i in range(threads)
                                  ]).reshape(-1, *test_env.state_shape)

            # action size
            a_, _ = self._network(states[0])
            a_len = len(a_[0].as_ndarray())

            loss = 0

            max_step = epoch_step / advantage

            self.logger._rollout = True
            self.logger.start(epoch_step)

            for j in range(int(max_step)):

                # for each step
                for step in range(advantage):

                    # calculate action value
                    actions[step] = self._action(states[step])

                    # for each thread
                    for thr in range(threads):

                        # next state,reward,done
                        states_n, rewards_n, dones_n = envs[thr].step(
                            int(actions[step][thr]))

                        states_next[step][thr] = np.copy(states_n)
                        rewards[step][thr] = np.copy(rewards_n)
                        dones[step][thr] = np.copy(dones_n)

                        # summing rewards / append steps
                        sum_rewards[thr] += rewards[step][thr]
                        sum_rewards_log[step][thr] = sum_rewards[thr].copy()
                        continuous_steps[thr] += 1
                        continuous_steps_log[step][thr] = continuous_steps[
                            thr].copy()
                        episode_counts_log[step][thr] = episode_counts[
                            thr].copy()
                        nth_episode_counts_log[step][thr] = nth_episode_counts[
                            thr].copy()

                        # if done, then reset, set next state is initial
                        if dones[step][thr]:
                            states_next[step][thr] = envs[thr].reset()
                            sum_rewards[thr] = 0
                            continuous_steps[thr] = 0
                            episode_counts[thr] += 1
                            nth_episode_counts[thr] += 1

                    epoch_steps_log[step] = epoch_steps_j
                    step_counts_log[step] = step_count

                    # append 1 step
                    epoch_steps_j += 1
                    step_count += 1

                    # setting step to next advanced step
                    if step + 1 < advantage:
                        states[step + 1] = states_next[step].copy()

                    # values are calculated at this section
                    values[step] = self._value(states[step])

                    # env epoch step
                    _ = [self.envs[_t].epoch_step() for _t in range(threads)]

                # copy rewards
                target_rewards[-1] = self._value(states_next[-1])

                # calculate rewards
                for i in reversed(range(advantage)):
                    mask = np.where(dones[i], 0.0, 1.0)
                    target_rewards[
                        i] = rewards[i] + target_rewards[i + 1] * gamma * mask

                # -------calcuating gradients-----

                # reshaping states, target
                reshaped_state = states.reshape(-1, *test_env.state_shape)
                reshaped_target_rewards = target_rewards[:-1].reshape(-1, 1)
                advantage_reward = reshaped_target_rewards - self._value(
                    reshaped_state)

                total_n = advantage * threads

                # reshape index variables for action
                action_index = actions.reshape(-1, )

                # caculate forward with comuptational graph
                self._network.set_models(inference=False)
                with self._network.train():
                    act, val, entropy = self._calc_forward(reshaped_state)
                    act_log = rm.log(act + 1e-5)

                    # initiallize
                    action_coefs = np.zeros_like(act.as_ndarray())

                    # write 1 for index at action_coefs
                    action_coefs[range(action_index.shape[0]),
                                 action_index.astype("int")] = 1

                    # append act loss and val loss
                    act_loss = - rm.sum(advantage_reward * action_coefs * act_log) / total_n \
                        - rm.sum(entropy) * entropy_coef / total_n
                    val_loss = self.loss_func(
                        val, reshaped_target_rewards) * value_coef * 2

                    # total loss
                    total_loss = val_loss + act_loss

                grad = total_loss.grad()

                if gradient_clipping is not None:
                    gradient_clipping(grad)

                grad.update(self._optimizer)

                val_loss_nd = float(val_loss.as_ndarray())
                total_loss_nd = float(total_loss.as_ndarray())
                entropy_np = float(entropy.as_ndarray().mean())

                singular_list = [
                    epoch_step, e, epoch, val_loss_nd, entropy_np,
                    total_loss_nd, advantage, threads
                ]
                log1_key = [
                    "max_step", "epoch", "max_epoch", "loss", "entropy",
                    "total_loss", "advantage", "num_worker"
                ]

                log1_value = [[data] * advantage for data in singular_list]

                thread_step_reverse_list = [
                    states, actions, rewards, dones, states_next,
                    step_counts_log, epoch_steps_log, episode_counts_log,
                    nth_episode_counts_log, continuous_steps_log,
                    sum_rewards_log, values
                ]

                log2_key = [
                    "state", "action", "reward", "terminal", "next_state",
                    "total_step", "epoch_step", "total_episode",
                    "epoch_episode", "steps_per_episode", "sum_reward",
                    "values"
                ]

                log2_value = [
                    data.swapaxes(1, 0)[0] for data in thread_step_reverse_list
                ]

                log_dic = {
                    **dict(zip(log1_key, log1_value)),
                    **dict(zip(log2_key, log2_value))
                }

                self.logger.logger(**log_dic)

                self.logger.update(advantage)

                states[0] = states_next[-1].copy()

                if any([self.envs[_t].terminate() for _t in range(threads)]):
                    print("terminated")
                    break

            else:

                summed_test_reward = self.test(test_step)
                self.logger.logger_epoch(
                    total_episode=episode_counts_log[-1],
                    epoch_episode=nth_episode_counts_log[-1],
                    epoch=e,
                    max_epoch=epoch,
                    test_reward=summed_test_reward,
                    entropy=entropy_np,
                    total_loss=total_loss_nd,
                    advantage=advantage,
                    num_worker=threads)
                self.logger.close()
                continue

            break
예제 #8
0
 def _calc_forward(self, x):
     act, val = self._network(x)
     e = -rm.sum(act * rm.log(act + 1e-5), axis=1)
     entropy = e.reshape(-1, 1)
     return act, val, entropy