def forward(self, x, eps=1.e-6, noise=None): nb = len(x) noise_rate, noise_sigma = 0, 1 if noise: if isinstance(noise,tuple): noise_rate, noise_sigma = noise else: noise_rate = noise noise_rate = noise_rate if noise_rate <= 1 else 1 noise_rate = noise_rate if 0 <= noise_rate else 0 noise_sigma = noise_sigma if 0 < noise_sigma else np.abs(noise_sigma) size = (nb, self.latent_dim) if 1: z = np.random.randn( np.array(size).prod() ).reshape(size).astype('float32') else: z = np.random.uniform( -1, 1, np.array(size).prod() ).reshape(size).astype('float32') self.xz = self.gen(z) if noise_rate > 0: noised = x.copy() _noise_idx = np.random.permutation(nb) _noise_idx = _noise_idx[np.random.permutation(int(nb*noise_rate))] _noise = np.random.randn(len(_noise_idx))*noise_sigma noised[_noise_idx] += _noise.reshape(-1, 1) self.Disx = self.dis(noised) else: self.Disx = self.dis(x) self.Disxz = self.dis(self.xz) self.real_cost = - rm.sum( rm.log(self.Disx + eps))/nb self.fake_cost = - rm.sum( rm.log(1 - self.Disxz + eps))/nb self.GAN_loss = self.real_cost + self.fake_cost self.real_count = (self.Disx >= 0.5).sum()/nb self.fake_count = (self.Disxz < 0.5).sum()/nb if self.gan_mode == 'minmax': self.gen_loss = - self.GAN_loss elif self.gan_mode == 'non-saturating': self.gen_loss = - rm.sum(rm.log(self.Disxz + eps))/nb return self.xz
def predict(self, src_seq, beam_width=10): src_seq = src_seq[::-1] xi = [self.src_w2i.get(word, self.src_w2i['<unk>']) for word in src_seq] # input word to index xi = np.array(xi).reshape(len(xi),1) xe = self.l1(xi) # index to vector(embedding) # encode for x in xe: h = self.encode(x.reshape(1,-1)) # decode cnt = 1 limit = 100 L = 0 H = {} H['z'] = h H['state'] = self.l2._state word = '<bos>' sentence = [word] t = (L, sentence, H) Q = [t] is_all_eos = False while is_all_eos == False and cnt <= limit + 1: # limit + 1 for <'eos'> cand = list() is_all_eos = True for L, sentence, H in Q: self.l4._z = H['z'] self.l4._state = H['state'] word = sentence[-1] if word == '<eos>': t = (L, sentence, H) cand.append(t) else: is_all_eos = False yi = [self.tar_w2i[word]] yi = np.array(yi).reshape(len(yi),-1) ye = self.l3(yi) y = ye.reshape(1,-1) yy = self.decode(y) p = rm.softmax(yy) p = rm.log(p).as_ndarray() p = p[0] z = {} z['z'] = self.l4._z z['state'] = self.l4._state for i in range(self.tar_vocab_size): w = self.tar_i2w[i] s = sentence + [w] l = L + p[i] t = (l, s, z) cand.append(t) cand = sorted(cand, key=lambda tup:tup[0], reverse=True) Q = cand[:beam_width] cnt += 1 self.truncate() _, sentence, _ = Q[0] return sentence
def forward(self, x, eps=1e-3): nb = len(x) size = (nb, self.latent_dim) zp = np.random.randn(np.array(size).prod()).reshape(size).astype('float32') self.xp = self.gen(zp) self.Dis_xp = self.dis(self.xp) self.Dis_xp_is = self.dis.raw_output self.Dis_x = self.dis(x) self.real_cost = - rm.sum(rm.log(self.Dis_x + eps))/nb self.fake_cost = - rm.sum(rm.log(1 - self.Dis_xp + eps))/nb self.GAN_loss = self.real_cost + self.fake_cost gan_mode = 'non-saturating' if gan_mode == 'minmax': self.gen_loss = - self.GAN_loss elif gan_mode == 'non-saturating': self.gen_loss = - rm.sum(rm.log(self.Dis_xp + eps))/nb elif gan_mode == 'max-likelihood': self.gen_loss = - rm.sum(rm.exp(self.Dis_xp_is))/nb return self.GAN_loss
def func(node): return sum(rm.log(node))
def forward(self, x, y=None, eps=1e-3): # x : input data # y : one-hot label data for categorical dist. or supporting dis. # empty is not assignment # self.qzx : style z # self.rep : input data for decoding nb = len(x) # --- encoding phase --- if 0: noise = random.randn(x.size).reshape(nb, x.shape[1])*0.03 self._x = x+noise else: _x = x if self.mode=='clustering' or self.mode=='reduction': self.qzx, self.qyx = self.enc(_x) else: self.qzx = self.enc(_x) # --- decoding/reconstruction phase --- if self.mode=='clustering' or self.mode=='reduction': self.recon = self.dec(rm.concat(self.qzx, self.qyx)) else: self.recon = self.dec(self.qzx) # --- reguralization phase --- if self.mode == 'incorp_label': self._set_incorpdist(x) else: self._set_distribution(x) if self.mode == 'clustering': "categorical dist" elif self.mode == 'supervised': "" elif self.mode == 'dim_reduction': "" if self.mode == 'incorp_label': self._incorp_label(x, y, eps=eps) else: self.Dpz = self.dis(self.pz) self.Dqzx = self.dis(self.qzx) self.real = -rm.sum(rm.log( self.Dpz + eps ))/nb self.fake = -rm.sum(rm.log( 1 - self.Dqzx + eps ))/nb self.fake2pos = -rm.sum(rm.log( self.Dqzx + eps ))/nb if self.mode=='clustering' or self.mode=='reduction': _idx = np.where(y.sum(1)==1)[0] idx_ = np.where(y.sum(1)==0)[0] if len(_idx) > 0: self.Cy = self.cds(y) self.Cqyx = self.cds(self.qyx) self.Creal = -rm.sum(rm.log( self.Cy[_idx] + eps ))/len(_idx) if 0: self.Cfake = -rm.sum(rm.log( 1 - self.Cqyx[_idx] + eps ))/len(_idx) else: self.Cfake = -rm.sum(rm.log( 1 - self.Cqyx + eps ))/nb self.Cfake2 = -rm.sum(rm.log( self.Cqyx[_idx] + eps ))/len(_idx) else: self.Cfake = rm.Variable(0) self.Creal = rm.Variable(0) self.Cfake2 = rm.Variable(0) # --- sumalizing loss --- self.gan_loss = self.real + self.fake if self.mode=='clustering': if len(_idx) > 0: self.reconE = rm.mean_squared_error( self.recon[idx_], x[idx_]) else: self.reconE = rm.mean_squared_error(self.recon, x) else: self.reconE = rm.mean_squared_error(self.recon, x) self.real_count = (self.Dpz >= 0.5).sum()/nb self.fake_count = (self.Dqzx < 0.5).sum()/nb self.enc_loss = self.fake2pos if self.mode=='clustering' or self.mode=='reduction': if len(_idx) > 0: self.Creal_count = (self.Cy[_idx] >= 0.5).sum()/len(_idx) self.Cfake_count = (self.Cqyx[_idx] < 0.5).sum()/len(_idx) else: self.Creal_count = 0 self.Cfake_count = 0 self.CganE = self.Creal + self.Cfake self.CgenE = self.Cfake2 return self.recon
def func(_dir, x): return -rm.sum(rm.log( x+eps if _dir=='pos' else 1-x+eps))
def fit(self, epoch=1, epoch_step=250000, test_step=None): """ This method executes training of actor critic. Test will be runned after each epoch is done. Args: epoch (int): Number of epoch for training. epoch_step (int): Number of step of one epoch. test_step (int): Number steps during test. """ # check assert isinstance(self.logger, Logger), "logger must be Logger class" self.logger._key_check(log_key=_a2c_keys, log_key_epoch=_a2c_keys_epoch) # creating local variables envs = self.envs test_env = self.test_env advantage = self._advantage threads = self._num_worker gamma = self.gamma gradient_clipping = self.gradient_clipping value_coef = self.value_coef entropy_coef = self.entropy_coef # env start(after reset) [self.envs[_t].start() for _t in range(threads)] # logging step_counts_log = np.zeros(( advantage, threads, )) step_count = 0 episode_counts_log = np.zeros(( advantage, threads, )) episode_counts = np.zeros((threads, )) # epoch for e in range(1, epoch + 1): # r,a,r,t,s+1 states = np.zeros((advantage, threads, *test_env.state_shape)) actions = np.zeros((advantage, threads, 1)) rewards = np.zeros((advantage, threads, 1)) dones = np.zeros((advantage, threads, 1)) states_next = np.zeros((advantage, threads, *test_env.state_shape)) # value, target value function values = np.zeros((advantage, threads, 1)) target_rewards = np.zeros((advantage + 1, threads, 1)) # logging sum_rewards_log = np.zeros(( advantage, threads, )) sum_rewards = np.zeros((threads, )) continuous_steps_log = np.zeros(( advantage, threads, )) continuous_steps = np.zeros((threads, )) epoch_steps_log = np.zeros(( advantage, threads, )) epoch_steps_j = 0 nth_episode_counts_log = np.zeros(( advantage, threads, )) nth_episode_counts = np.zeros((threads, )) # env epoch _ = [self.envs[_t].epoch() for _t in range(threads)] # initiallize states[0] = np.array([envs[i].reset() for i in range(threads) ]).reshape(-1, *test_env.state_shape) # action size a_, _ = self._network(states[0]) a_len = len(a_[0].as_ndarray()) loss = 0 max_step = epoch_step / advantage self.logger._rollout = True self.logger.start(epoch_step) for j in range(int(max_step)): # for each step for step in range(advantage): # calculate action value actions[step] = self._action(states[step]) # for each thread for thr in range(threads): # next state,reward,done states_n, rewards_n, dones_n = envs[thr].step( int(actions[step][thr])) states_next[step][thr] = np.copy(states_n) rewards[step][thr] = np.copy(rewards_n) dones[step][thr] = np.copy(dones_n) # summing rewards / append steps sum_rewards[thr] += rewards[step][thr] sum_rewards_log[step][thr] = sum_rewards[thr].copy() continuous_steps[thr] += 1 continuous_steps_log[step][thr] = continuous_steps[ thr].copy() episode_counts_log[step][thr] = episode_counts[ thr].copy() nth_episode_counts_log[step][thr] = nth_episode_counts[ thr].copy() # if done, then reset, set next state is initial if dones[step][thr]: states_next[step][thr] = envs[thr].reset() sum_rewards[thr] = 0 continuous_steps[thr] = 0 episode_counts[thr] += 1 nth_episode_counts[thr] += 1 epoch_steps_log[step] = epoch_steps_j step_counts_log[step] = step_count # append 1 step epoch_steps_j += 1 step_count += 1 # setting step to next advanced step if step + 1 < advantage: states[step + 1] = states_next[step].copy() # values are calculated at this section values[step] = self._value(states[step]) # env epoch step _ = [self.envs[_t].epoch_step() for _t in range(threads)] # copy rewards target_rewards[-1] = self._value(states_next[-1]) # calculate rewards for i in reversed(range(advantage)): mask = np.where(dones[i], 0.0, 1.0) target_rewards[ i] = rewards[i] + target_rewards[i + 1] * gamma * mask # -------calcuating gradients----- # reshaping states, target reshaped_state = states.reshape(-1, *test_env.state_shape) reshaped_target_rewards = target_rewards[:-1].reshape(-1, 1) advantage_reward = reshaped_target_rewards - self._value( reshaped_state) total_n = advantage * threads # reshape index variables for action action_index = actions.reshape(-1, ) # caculate forward with comuptational graph self._network.set_models(inference=False) with self._network.train(): act, val, entropy = self._calc_forward(reshaped_state) act_log = rm.log(act + 1e-5) # initiallize action_coefs = np.zeros_like(act.as_ndarray()) # write 1 for index at action_coefs action_coefs[range(action_index.shape[0]), action_index.astype("int")] = 1 # append act loss and val loss act_loss = - rm.sum(advantage_reward * action_coefs * act_log) / total_n \ - rm.sum(entropy) * entropy_coef / total_n val_loss = self.loss_func( val, reshaped_target_rewards) * value_coef * 2 # total loss total_loss = val_loss + act_loss grad = total_loss.grad() if gradient_clipping is not None: gradient_clipping(grad) grad.update(self._optimizer) val_loss_nd = float(val_loss.as_ndarray()) total_loss_nd = float(total_loss.as_ndarray()) entropy_np = float(entropy.as_ndarray().mean()) singular_list = [ epoch_step, e, epoch, val_loss_nd, entropy_np, total_loss_nd, advantage, threads ] log1_key = [ "max_step", "epoch", "max_epoch", "loss", "entropy", "total_loss", "advantage", "num_worker" ] log1_value = [[data] * advantage for data in singular_list] thread_step_reverse_list = [ states, actions, rewards, dones, states_next, step_counts_log, epoch_steps_log, episode_counts_log, nth_episode_counts_log, continuous_steps_log, sum_rewards_log, values ] log2_key = [ "state", "action", "reward", "terminal", "next_state", "total_step", "epoch_step", "total_episode", "epoch_episode", "steps_per_episode", "sum_reward", "values" ] log2_value = [ data.swapaxes(1, 0)[0] for data in thread_step_reverse_list ] log_dic = { **dict(zip(log1_key, log1_value)), **dict(zip(log2_key, log2_value)) } self.logger.logger(**log_dic) self.logger.update(advantage) states[0] = states_next[-1].copy() if any([self.envs[_t].terminate() for _t in range(threads)]): print("terminated") break else: summed_test_reward = self.test(test_step) self.logger.logger_epoch( total_episode=episode_counts_log[-1], epoch_episode=nth_episode_counts_log[-1], epoch=e, max_epoch=epoch, test_reward=summed_test_reward, entropy=entropy_np, total_loss=total_loss_nd, advantage=advantage, num_worker=threads) self.logger.close() continue break
def _calc_forward(self, x): act, val = self._network(x) e = -rm.sum(act * rm.log(act + 1e-5), axis=1) entropy = e.reshape(-1, 1) return act, val, entropy