def __call__(self, x_data, y_data): x = chainer.Variable(x_data) y = chainer.Variable(y_data) # q(z|x,y) rh1 = F.relu(self.recog1(x)) rh2 = F.relu(self.recog2(rh1)) recog_mean = self.recog_mean(rh2) #recog_log_sigma = 0.5 * self.recog_log_sigma(rh2) recog_log_sigma = self.recog_log_sigma(rh2) eps = np.random.normal(0, 1, (x.data.shape[0], nz)).astype(np.float32) eps = chainer.Variable(eps) # z = mu + sigma + epsilon z = recog_mean + F.exp(0.5 * recog_log_sigma) * eps #z = recog_mean + F.exp(recog_log_sigma) * eps gh1 = F.relu(self.gen1(z)) gh2 = F.relu(self.gen2(gh1)) gen_mean = self.gen_mean(gh2) output = F.sigmoid(gen_mean) loss = F.mean_squared_error(output, y) kld = -0.5 * F.sum(1 + recog_log_sigma - recog_mean**2 - F.exp(recog_log_sigma)) / (x_data.shape[0] * x_data.shape[1]) return loss, kld, output
def __call__(self, x, y): h = F.sigmoid(self.l1_(x)) coef = F.softmax(self.coef_(h)) mean = F.reshape(self.mean_(h), (-1,self.NUM_MIXTURE,self.OUT_DIM)) logvar = self.logvar_(h) mean, y = F.broadcast(mean, F.reshape(y, (-1,1,self.OUT_DIM))) return F.sum( coef*F.exp(-0.5*F.sum((y-mean)**2, axis=2)*F.exp(-logvar))/ ((2*np.pi*F.exp(logvar))**(0.5*self.OUT_DIM)),axis=1)
def permutation_probability_loss(x, t, length): length = length.reshape(-1, 1) mask = np.tile(np.arange(x.shape[1]).reshape(1, -1), (x.shape[0], 1)) < length mask = chainer.Variable(mask) padding = chainer.Variable(np.zeros(x.shape, dtype=x.dtype)) log_p_x = logsoftmax(x, mask, padding, axis=1) log_p_t = logsoftmax(t, mask, padding, axis=1) loss = F.exp(log_p_t) * log_p_t - F.exp(log_p_t) * log_p_x return F.sum(loss) / float(x.shape[0])
def forward(self, x_data, state): """ Does encode/decode on x_data. :param x_data: input data (a single timestep) as a numpy.ndarray :param state: previous state of RNN :param nonlinear_q: nonlinearity used in q(z|x) (encoder) :param nonlinear_p: nonlinearity used in p(x|z) (decoder) :param output_f: #TODO# :return: output, recognition loss, KL Divergence, state """ # =====[ Step 1: Compute q(z|x) - encoding step, get z ]===== # Forward encoding for i in range(x_data.shape[0]): sum_ones_reshape = np.sum(x_data[i].reshape((1, x_data.shape[1]))) sum_ones_reg = np.sum(x_data[i]) # grab the i-th element of x x = Variable(x_data[i].reshape((1, x_data.shape[1]))) h_in = self.recog_x_h(x) + self.recog_h_h(state["h_rec"]) c_t, h_t = F.lstm(state["c_rec"], h_in) state.update({"c_rec": c_t, "h_rec": h_t}) # Compute q_mean and q_log_sigma q_mean = self.recog_mean(state["h_rec"]) q_log_sigma = 0.5 * self.recog_log_sigma(state["h_rec"]) # Compute KL divergence based on q_mean and q_log_sigma KLD = -0.0005 * F.sum(1 + q_log_sigma - q_mean ** 2 - F.exp(q_log_sigma)) # Compute as q_mean + noise*exp(q_log_sigma) eps = Variable(np.random.normal(0, 1, q_log_sigma.data.shape).astype(np.float32)) z = q_mean + F.exp(q_log_sigma) * eps # =====[ Step 2: Compute p(x|z) - decoding step ]===== # Initial step output = [] h_in = self.gen_z_h(z) c_t, h_t = F.lstm(state["c_gen"], h_in) state.update({"c_gen": c_t, "h_gen": h_t}) rec_loss = Variable(np.zeros((), dtype=np.float32)) for i in range(x_data.shape[0]): # Get output and loss x_t = self.output(h_t) output.append(x_t.data) # print("size of x_t output data sequence: " + str(x_t.data.shape)) rec_loss += self.loss_func(x_t, Variable(x_data[i].reshape((1, x_data.shape[1])))) # Get next hidden state h_in = self.gen_x_h(x_t) + self.gen_h_h(state["h_gen"]) c_t, h_t = F.lstm(state["c_gen"], h_in) state.update({"c_gen": c_t, "h_gen": h_t}) # =====[ Step 3: Compute KL-Divergence based on all terms ]===== return np.array(output), rec_loss, KLD, state
def __call__(self, fs, bs, h): ''' Attentionの計算 :param fs: 順向きのEncoderの中間ベクトルが記録されたリスト :param bs: 逆向きのEncoderの中間ベクトルが記録されたリスト :param h: Decoderで出力された中間ベクトル :return: 順向きのEncoderの中間ベクトルの加重平均と逆向きのEncoderの中間ベクトルの加重平均 ''' batch_size = h.data.shape[0] # ミニバッチのサイズを記憶 ws = [] # ウェイトを記録するためのリストの初期化 sum_w = Variable(xp.zeros((batch_size, 1), dtype='float32')) # ウェイトの合計値を計算するための値を初期化 # Encoderの中間ベクトルとDecoderの中間ベクトルを使ってウェイトの計算 for f, b in zip(fs, bs): w = F.tanh(self.fh(f)+self.bh(b)+self.hh(h)) # 順向きEncoderの中間ベクトル、逆向きEncoderの中間ベクトル、Decoderの中間ベクトルを使ってウェイトの計算 w = F.exp(self.hw(w)) # softmax関数を使って正規化する ws.append(w) # 計算したウェイトを記録 sum_w += w # 出力する加重平均ベクトルの初期化 att_f = Variable(xp.zeros((batch_size, self.hidden_size), dtype='float32')) att_b = Variable(xp.zeros((batch_size, self.hidden_size), dtype='float32')) for f, b, w in zip(fs, bs, ws): w /= sum_w # ウェイトの和が1になるように正規化 # ウェイト * Encoderの中間ベクトルを出力するベクトルに足していく att_f += F.reshape(F.batch_matmul(f, w), (batch_size, self.hidden_size)) att_b += F.reshape(F.batch_matmul(b, w), (batch_size, self.hidden_size)) return att_f, att_b
def likelihood_ratio(self, other, a): """ Compute p_self(a) / p_other(a) """ logli = self.logli(a) other_logli = other.logli(a) return F.exp(logli - other_logli)
def transform(self, images, normalized=False): '''Transform image data to latent space. Parameters ---------- images : array-like shape (n_images, image_width, image_height, n_colors) Input numpy array of images. normalized [optional] : bool Normalization flag that specifies whether pixel data is normalized to a [0,1] scale. Returns ------- latent_vec : array-like shape (n_images, latent_dim) ''' n_samp = images.shape[0] x_encoding = images.flatten().reshape((n_samp, -1)) x_encoding = chainer.Variable(x_encoding) if not normalized: x_encoding /= 255. x_encoded = self._encode(x_encoding) mean, std = F.split_axis(x_encoded, 2, 1) # Create `latent_dim` N(0,1) normal samples. samples = np.random.standard_normal(mean.data.shape).astype('float32') if self.flag_gpu: samples = cuda.to_gpu(samples) samples = chainer.Variable(samples) # Scale samples to model trained parameters. sample_set = samples * F.exp(0.5*std) + mean return sample_set.data
def __call__(self, h, dist): """ Args: h (numpy.ndarray): axis 0 represents minibatch index, axis 1 represents atom_index and axis2 represents feature dimension. dist (numpy.ndarray): axis 0 represents minibatch index, axis 1 and 2 represent distance between atoms. """ mb, atom, ch = h.shape if ch != self.hidden_dim: raise ValueError('h.shape[2] {} and hidden_dim {} must be same!' .format(ch, self.hidden_dim)) embedlist = self.xp.arange( self.num_rbf).astype('f') * self.radius_resolution dist = functions.reshape(dist, (mb, atom, atom, 1)) dist = functions.broadcast_to(dist, (mb, atom, atom, self.num_rbf)) dist = functions.exp(- self.gamma * (dist - embedlist) ** 2) dist = functions.reshape(dist, (-1, self.num_rbf)) dist = self.dense1(dist) dist = functions.softplus(dist) dist = self.dense2(dist) dist = functions.softplus(dist) dist = functions.reshape(dist, (mb, atom, atom, self.hidden_dim)) h = functions.reshape(h, (mb, atom, 1, self.hidden_dim)) h = functions.broadcast_to(h, (mb, atom, atom, self.hidden_dim)) h = functions.sum(h * dist, axis=1) return h
def __call__(self, annotion_list, back_word_list, p): """ Calculate the annotion and back word value :param annotion_list: :param back_word_list: :param p: hidden value :return: """ batch_size = p.data.shape[0] exponential_list = [] sum_exponential = XP.fzeros((batch_size, 1)) # Calculate the total value list and total value # Prepare the Convoluation for annotion, back_word in zip(annotion_list, back_word_list): weight = functions.tanh(self.annotion_weight(annotion) + self.back_weight(back_word) + self.pw(p)) exponential = functions.exp(self.weight_exponential(weight)) exponential_list.append(exponential) sum_exponential += exponential ZEROS = XP.fzeros((batch_size, self.hidden_size)) annotion_value = ZEROS back_word_value = ZEROS # Calculate the Convolution Value each annotion and back word for annotion, back_word, exponential in zip(annotion_list, back_word_list, exponential_list): exponential /= sum_exponential annotion_value += functions.reshape(functions.batch_matmul(annotion, exponential), (batch_size, self.hidden_size)) back_word_value += functions.reshape(functions.batch_matmul(back_word, exponential), (batch_size, self.hidden_size)) return annotion_value, back_word_value
def __call__(self, x, y): """ Parameters ----------------- x: Variable Feature of unlabeled samples. y: Variable Feature of unlabeled samples. """ g, x, y = F.broadcast(*[self.gamma, x, y]) x_g = x * g y_g = y * g x_g_norm = F.sum(x_g**2, axis=1) y_g_norm = F.sum(y_g**2, axis=1) x_g_y_g = F.linear(x_g, y_g) x_g_norm, x_g_y_g, y_g_norm = \ F.broadcast( *[x_g_norm, x_g_y_g, F.expand_dims(y_g_norm, 1)]) #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm)) u = x_g_norm - 2 * x_g_y_g+ y_g_norm print(np.min(u.data)) print(len((np.where(u.data < 0)[0])), np.prod(u.data.shape)) time.sleep(0.5) return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
def __call__(self, x, y): """ Parameters ----------------- x: Variable Feature of unlabeled samples. y: Variable Feature of unlabeled samples. """ g, x, y = F.broadcast(*[self.gamma, x, y]) x_g = x * g y_g = y * g x_g_norm = F.sum(x_g**2, axis=1) y_g_norm = F.sum(y_g**2, axis=1) x_g_y_g = F.linear(x_g, y_g) x_g_norm, x_g_y_g, y_g_norm = \ F.broadcast( *[x_g_norm, x_g_y_g, F.expand_dims(y_g_norm, 1)]) #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm)) return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
def forward_one_step(self, x_data, c_data, y_data, state, train=True): x = chainer.Variable(x_data, volatile=not train) t = chainer.Variable(y_data, volatile=not train) c = chainer.Variable(c_data, volatile=not train) h1_in = self.l1_first(x) + self.l1_recur(state['h1']) + self.l1_w(state['w']) c1, h1 = F.lstm(state['c1'], h1_in) # soft window ws = F.exp(self.lw(h1)) w_mixws, w_gains, w_means = split_axis_by_widths(ws, 3) w_means += state['w_means'] w = self.forward_window(w_mixws, w_gains, w_means, c) h2_in = self.l2_first(x) + self.l2_recur(state['h2']) + self.l1_w(w) + self.l2_input(h1) c2, h2 = F.lstm(state['c2'], h2_in) h3_in = self.l3_first(x) + self.l3_recur(state['h3']) + self.l1_w(w) + self.l3_input(h2) c3, h3 = F.lstm(state['c3'], h3_in) y = self.l4(F.concat(h1, h2, h3)) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2, 'c3': c3, 'h3': h3, 'w': w, 'w_means': w_means} return state, loss_func(self.noutput_gauss, y, t)
def __call__(self, x, y): """ Parameters ----------------- x: Variable Feature of unlabeled samples. y: Variable Feature of unlabeled samples. """ g = F.broadcast_to( F.gaussian( np.array([0], dtype=np.float32), np.array([np.exp(1)], dtype=np.float32)), x.shape) x_g = x * g y_g = y * g x_g_norm = F.sum(x_g**2, axis=1) y_g_norm = F.sum(y_g**2, axis=1) x_g_y_g = F.linear(x_g, y_g) x_g_norm, x_g_y_g, y_g_norm = \ F.broadcast( *[x_g_norm, x_g_y_g, F.expand_dims(y_g_norm, 1)]) #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm)) return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
def gaussian_kl_divergence(mean, ln_var): """Calculate KL-divergence between given gaussian and the standard one. Given two variable ``mean`` representing :math:`\\mu` and ``ln_var`` representing :math:`\\log(\\sigma^2)`, this function returns a variable representing KL-divergence between given multi-dimensional gaussian :math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)` .. math:: D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)), where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2` and :math:`I` is an identity matrix. Args: mean (~chainer.Variable): A variable representing mean of given gaussian distribution, :math:`\\mu`. ln_var (~chainer.Variable): A variable representing logarithm of variance of given gaussian distribution, :math:`\\log(\\sigma^2)`. Returns: ~chainer.Variable: A variable representing KL-divergence between given gaussian distribution and the standard gaussian. """ assert isinstance(mean, variable.Variable) assert isinstance(ln_var, variable.Variable) J = mean.data.size var = F.exp(ln_var) return (F.sum(mean * mean) + F.sum(var) - F.sum(ln_var) - J) * 0.5
def _infer_z(mu, ln_var): batch_size = mu.data.shape[0] var = F.exp(ln_var) z = F.gaussian(mu, ln_var) kl = -F.sum(1 + ln_var - mu ** 2 - var) / 2 kl /= batch_size return z, kl
def __call__(self, a_list, b_list, p, sentence_length, window_size): batch_size = p.data.shape[0] SENTENCE_LENGTH = XP.fnonzeros((batch_size, 1),sentence_length) e_list = [] sum_e = XP.fzeros((batch_size, 1)) s = functions.tanh(self.ts(p)) pos = SENTENCE_LENGTH * functions.sigmoid(self.sp(s)) # Develop batch logic to set to zero the components of a and b which are out of the window # Big question: Do I have to iterate over each element in the batch? That would suck. # One logic: Get global alignment matrix of (batch x) hidden size x sentence length and then another matrix of (batch x) sentence length which # will essentially be a matrix containing the gaussian distrubution weight and there will be zeros where the sentence position falls out of the window # Another logic: Create a matrix of (batch x) sentence length where there will be 1 for each position in the window # Separate the attention weights for a and b cause forward is different from backward. for a, b in zip(a_list, b_list): w = functions.tanh(self.aw(a) + self.bw(b) + self.pw(p)) e = functions.exp(self.we(w)) e_list.append(e) sum_e += e ZEROS = XP.fzeros((batch_size, self.hidden_size)) aa = ZEROS bb = ZEROS for a, b, e in zip(a_list, b_list, e_list): e /= sum_e aa += a * e bb += b * e return aa, bb
def log_pz(self, z, mean, ln_var, test=False): if self.type_pz == "gaussianmarg": # \int q(z)logp(z)dz = -(J/2)*log2pi - (1/2)*sum_{j=1}^{J} (mu^2 + var) # See Appendix B [Auto-Encoding Variational Bayes](http://arxiv.org/abs/1312.6114) log_pz = -0.5 * (math.log(2.0 * math.pi) + mean * mean + F.exp(ln_var)) elif self.type_pz == "gaussian": log_pz = -0.5 * math.log(2.0 * math.pi) - 0.5 * z ** 2 return F.sum(log_pz, axis=1)
def gauss_bernoulli_params(m, y): width = [m, 2 * m, 2 * m, m, 1] y_mixws, y_means, y_stdds, y_corrs, y_e = split_axis_by_widths(y, width) y_mixws = F.softmax(y_mixws) y_means0, y_means1 = split_axis_by_widths(y_means, 2) y_stdds0, y_stdds1 = split_axis_by_widths(F.exp(y_stdds), 2) y_corrs = F.tanh(y_corrs) return (y_mixws, y_means0, y_means1, y_stdds0, y_stdds1, y_corrs), y_e
def cosine_similarity(x, y, eps=1e-6): n1, n2, n3 = x.data.shape _, m2, _ = y.data.shape z = F.batch_matmul(x, y, transb=True) x2 = F.broadcast_to(F.reshape(F.sum(x * x, axis=2), (n1, n2, 1)), (n1, n2, m2)) y2 = F.broadcast_to(F.reshape(F.sum(y * y, axis=2), (n1, 1, m2)), (n1, n2, m2)) z /= F.exp(F.log(x2 * y2 + eps) / 2) return z
def log_pz(self, z, mean, ln_var): if self.type_pz == "gaussianmarg": # \int q(z)logp(z)dz = -(J/2)*log2pi - (1/2)*sum_{j=1}^{J} (mu^2 + var) # See Appendix B [Auto-Encoding Variational Bayes](http://arxiv.org/abs/1312.6114) # See https://github.com/dpkingma/nips14-ssl/blob/master/anglepy/models/VAE_YZ_X.py line 106 log_pz = -0.5 * (math.log(2.0 * math.pi) + mean * mean + F.exp(ln_var)) elif self.type_pz == "gaussian": log_pz = -0.5 * math.log(2.0 * math.pi) - 0.5 * z ** 2 return F.sum(log_pz, axis=1)
def gaussian_nll_keepbatch(self, x, mean, ln_var, clip=True): if clip: clip_min = math.log(0.001) clip_max = math.log(10) ln_var = F.clip(ln_var, clip_min, clip_max) x_prec = F.exp(-ln_var) x_diff = x - mean x_power = (x_diff * x_diff) * x_prec * 0.5 return F.sum((math.log(2.0 * math.pi) + ln_var) * 0.5 + x_power, axis=1)
def gaussian_mixture_2d_ref(*inputs): w, m1, m2, s1, s2, c, x1, x2 = inputs z1 = vec_sub_mat(x1, m1, lhs_bwd=False) / s1 z2 = vec_sub_mat(x2, m2, lhs_bwd=False) / s2 z1 = (z1 - c * z2)**2.0 z2 = 1.0 - c**2.0 z3 = 2.0 * numpy.pi * s1 * s2 * z2 ** 0.5 r = w * functions.exp(- z1 / (2.0 * z2)) / z3 return r
def _forward(self, img_batch): batch = chainer.Variable(img_batch / 255.) encoded = self._encode(batch) # Split latent space into `\mu` and `\sigma` parameters mean, std = F.split_axis(encoded, 2, 1) # Create `latent_dim` N(0,1) normal samples. samples = np.random.standard_normal(mean.data.shape).astype('float32') if self.flag_gpu: samples = cuda.to_gpu(samples) samples = chainer.Variable(samples) # Scale samples to model trained parameters. sample_set = samples * F.exp(0.5*std) + mean output = self._decode(sample_set) reconstruction_loss = F.mean_squared_error(output, batch) # Construct and scale KL Divergence loss. kl_div = -0.5 * F.sum(1 + std - mean ** 2 - F.exp(std)) kl_div /= (img_batch.shape[1] * img_batch.shape[0]) return reconstruction_loss, kl_div, output
def __call__(self, x): h = F.leaky_relu(self.l1(x)) h = F.leaky_relu(self.l2(h)) h = F.leaky_relu(self.l3(h)) h = F.leaky_relu(self.l4(h)) h = F.leaky_relu(self.l5(h)) h = F.leaky_relu(self.l6(h)) h = F.leaky_relu(self.l7(h)) return F.exp(self.l9(h)-13.0)
def vae_forward(self, noisy_h_data, h_data,n_layers_recog, n_layers_gen, nonlinear_q='softplus', nonlinear_p='softplus', gpu=-1,train=True): from random import gauss noisy_inputs = Variable(noisy_h_data) # For non-whole inputs = Variable(h_data) # set non-linear function nonlinear_dict = {'sigmoid': F.sigmoid, 'tanh': F.tanh, 'softplus': F.softplus, 'relu': F.relu, 'clipped_relu': F.clipped_relu, 'leaky_relu': F.leaky_relu} nonlinear_f_q = nonlinear_dict[nonlinear_q] nonlinear_f_p = nonlinear_dict[nonlinear_p] chain = [noisy_inputs] # compute q(z|x, y) for i in range(n_layers_recog): chain.append(F.dropout(nonlinear_f_q(getattr(self, 'vae_recog_%i' % i)(chain[-1])),train=train)) recog_out = getattr(self, 'vae_recog_%i' % n_layers_recog)(chain[-1]) log_sigma_out = 0.5 * (getattr(self, 'log_sigma')(chain[-1])) # np.random.seed(123) eps = np.random.normal(0, 1, (inputs.data.shape[0], log_sigma_out.data.shape[1])).astype('float32') if gpu >= 0: eps = cuda.to_gpu(eps) eps = Variable(eps) z = recog_out + F.exp(log_sigma_out) * eps chain += [recog_out, z] for i in range(n_layers_gen): chain.append(F.dropout(nonlinear_f_p(getattr(self, 'vae_gen_%i' % i)(chain[-1])),train=train)) # chain.append(F.sigmoid(getattr(self, 'vae_gen_%i' % (n_layers_gen))(chain[-1]))) chain.append(getattr(self, 'vae_gen_%i' % (n_layers_gen))(chain[-1])) output = chain[-1] rec_loss = F.mean_squared_error(output, inputs) KLD = -0.5 * F.sum(1 + log_sigma_out - recog_out**2 - F.exp(log_sigma_out)) / (inputs.data.shape[0]*inputs.data.shape[1]) return rec_loss, KLD, output
def kl_div(self, other): logli = F.log_softmax(self.logits) other_logli = F.log_softmax(other.logits) # new_prob_var = new_dist_info_vars["prob"] # Assume layout is N * A return F.sum( F.exp(logli) * (logli - other_logli), axis=-1 )
def encode(self, data, test=False): x = self.enc(data, test=test) mean, ln_var = F.split_axis(x, 2, 1) samp = np.random.standard_normal(mean.data.shape).astype('float32') samp = Variable(samp) if self.flag_gpu: samp.to_gpu() z = samp * F.exp(0.5*ln_var) + mean return z, mean, ln_var
def logli(self, a): a = F.cast(a, np.float32) # transform back to standard normal zs = (a - self.means) * F.exp(-self.log_stds) # density of standard normal: f(z) = (2*pi*det|Σ|)^(-n/2) * exp(-|x|^2/2) # the return value should be log f(z) return - F.sum(self.log_stds, axis=-1) - \ 0.5 * F.sum(F.square(zs), axis=-1) - \ 0.5 * self.means.shape[-1] * np.log(2 * np.pi)
def forward_one_step(self, x_data, y_data, n_layers_recog, n_layers_gen, nonlinear_q='softplus', nonlinear_p='softplus', output_f = 'sigmoid', type_qx='gaussian', type_px='gaussian', gpu=-1): x = Variable(x_data) y = Variable(y_data) # set non-linear function nonlinear = {'sigmoid': F.sigmoid, 'tanh': F.tanh, 'softplus': self.softplus, 'relu': F.relu} nonlinear_f_q = nonlinear[nonlinear_q] nonlinear_f_p = nonlinear[nonlinear_p] output_activation = {'sigmoid': F.sigmoid, 'identity': self.identity, 'tanh': F.tanh} output_a_f = output_activation[output_f] hidden_q = [ nonlinear_f_q( self.recog_x( x ) + self.recog_y( y ) ) ] # compute q(z|x, y) for i in range(n_layers_recog-1): hidden_q.append(nonlinear_f_q(getattr(self, 'recog_%i' % i)(hidden_q[-1]))) q_mean = getattr(self, 'recog_mean')(hidden_q[-1]) q_log_sigma = 0.5 * getattr(self, 'recog_log')(hidden_q[-1]) eps = np.random.normal(0, 1, (x.data.shape[0], q_log_sigma.data.shape[1])).astype('float32') if gpu >= 0: eps = cuda.to_gpu(eps) eps = Variable(eps) z = q_mean + F.exp(q_log_sigma) * eps # compute q(x |y, z) hidden_p = [ nonlinear_f_p( self.gen_y( y ) + self.gen_z( z ) ) ] for i in range(n_layers_gen-1): hidden_p.append(nonlinear_f_p(getattr(self, 'gen_%i' % i)(hidden_p[-1]))) hidden_p.append(output_a_f(getattr(self, 'gen_out')(hidden_p[-1]))) output = hidden_p[-1] rec_loss = F.mean_squared_error(output, x) KLD = -0.5 * F.sum(1 + q_log_sigma - q_mean**2 - F.exp(q_log_sigma)) / (x_data.shape[0]*x_data.shape[1]) return rec_loss, KLD, output
def kl_div(self, other): """ Given the distribution parameters of two diagonal multivariate Gaussians, compute their KL divergence (vectorized) Reference: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence#Kullback.E2.80.93Leibler_divergence_for_multivariate_normal_distributions In general, for two n-dimensional distributions, we have D_KL(N1||N2) = 1/2 ( tr(Σ_2^{-1}Σ_1) + (μ_2 - μ_1)^T Σ_2^{-1} (μ_2 - μ_1) - n + ln(det(Σ_2) / det(Σ_1)) ) Here, Σ_1 and Σ_2 are diagonal. Hence this equation can be simplified. In terms of the parameters of this method, - ln(det(Σ_2) / det(Σ_1)) = sum(2 * (log_stds_2 - log_stds_1), axis=-1) - (μ_2 - μ_1)^T Σ_2^{-1} (μ_2 - μ_1) = sum((means_1 - means_2)^2 / vars_2, axis=-1) - tr(Σ_2^{-1}Σ_1) = sum(vars_1 / vars_2, axis=-1) Where - vars_1 = exp(2 * log_stds_1) - vars_2 = exp(2 * log_stds_2) Combined together, we have D_KL(N1||N2) = 1/2 ( tr(Σ_2^{-1}Σ_1) + (μ_2 - μ_1)^T Σ_2^{-1} (μ_2 - μ_1) - n + ln(det(Σ_2) / det(Σ_1)) ) = sum(1/2 * ((vars_1 - vars_2) / vars_2 + (means_1 - means_2)^2 / vars_2 + 2 * (log_stds_2 - log_stds_1)), axis=-1) = sum( ((means_1 - means_2)^2 + vars_1 - vars_2) / (2 * vars_2) + (log_stds_2 - log_stds_1)), axis=-1) :param means_1: List of mean parameters of the first distribution :param log_stds_1: List of log standard deviation parameters of the first distribution :param means_2: List of mean parameters of the second distribution :param log_stds_2: List of log standard deviation parameters of the second distribution :return: An array of KL divergences. """ vars = F.exp(2 * self.log_stds) other_vars = F.exp(2 * other.log_stds) return F.sum((F.square(self.means - other.means) + vars - other_vars) / (2 * other_vars + 1e-8) + other.log_stds - self.log_stds, axis=-1)
def main(): import logging parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=0, help='GPU to use, set to -1 if no GPU.') parser.add_argument('--env', type=str, default='Hopper-v2', help='OpenAI Gym MuJoCo env to perform algorithm on.') parser.add_argument('--num-envs', type=int, default=1, help='Number of envs run in parallel.') parser.add_argument('--seed', type=int, default=0, help='Random seed [0, 2 ** 32)') parser.add_argument('--outdir', type=str, default='results', help='Directory path to save output files.' ' If it does not exist, it will be created.') parser.add_argument('--steps', type=int, default=2 * 10**6, help='Total number of timesteps to train the agent.') parser.add_argument('--eval-interval', type=int, default=100000, help='Interval in timesteps between evaluations.') parser.add_argument('--eval-n-runs', type=int, default=100, help='Number of episodes run for each evaluation.') parser.add_argument('--render', action='store_true', help='Render env states in a GUI window.') parser.add_argument('--demo', action='store_true', help='Just run evaluation, not training.') parser.add_argument('--load', type=str, default='', help='Directory to load agent from.') parser.add_argument('--logger-level', type=int, default=logging.INFO, help='Level of the root logger.') parser.add_argument('--monitor', action='store_true', help='Wrap env with gym.wrappers.Monitor.') parser.add_argument('--log-interval', type=int, default=1000, help='Interval in timesteps between outputting log' ' messages during training') parser.add_argument('--update-interval', type=int, default=2048, help='Interval in timesteps between model updates.') parser.add_argument('--epochs', type=int, default=10, help='Number of epochs to update model for per PPO' ' iteration.') parser.add_argument('--batch-size', type=int, default=64, help='Minibatch size') args = parser.parse_args() logging.basicConfig(level=args.logger_level) # Set a random seed used in ChainerRL misc.set_random_seed(args.seed, gpus=(args.gpu, )) # Set different random seeds for different subprocesses. # If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3]. # If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7]. process_seeds = np.arange(args.num_envs) + args.seed * args.num_envs assert process_seeds.max() < 2**32 args.outdir = experiments.prepare_output_dir(args, args.outdir) def make_env(process_idx, test): env = gym.make(args.env) # Use different random seeds for train and test envs process_seed = int(process_seeds[process_idx]) env_seed = 2**32 - 1 - process_seed if test else process_seed env.seed(env_seed) # Cast observations to float32 because our model uses float32 env = chainerrl.wrappers.CastObservationToFloat32(env) if args.monitor: env = chainerrl.wrappers.Monitor(env, args.outdir) if args.render: env = chainerrl.wrappers.Render(env) return env def make_batch_env(test): return chainerrl.envs.MultiprocessVectorEnv([ functools.partial(make_env, idx, test) for idx, env in enumerate(range(args.num_envs)) ]) # Only for getting timesteps, and obs-action spaces sample_env = gym.make(args.env) timestep_limit = sample_env.spec.tags.get( 'wrapper_config.TimeLimit.max_episode_steps') obs_space = sample_env.observation_space action_space = sample_env.action_space print('Observation space:', obs_space) print('Action space:', action_space) assert isinstance(action_space, gym.spaces.Box) # Normalize observations based on their empirical mean and variance obs_normalizer = chainerrl.links.EmpiricalNormalization(obs_space.low.size, clip_threshold=5) # While the original paper initialized weights by normal distribution, # we use orthogonal initialization as the latest openai/baselines does. winit = chainerrl.initializers.Orthogonal(1.) winit_last = chainerrl.initializers.Orthogonal(1e-2) action_size = action_space.low.size policy = chainer.Sequential( L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, action_size, initialW=winit_last), chainerrl.policies.GaussianHeadWithStateIndependentCovariance( action_size=action_size, var_type='diagonal', var_func=lambda x: F.exp(2 * x), # Parameterize log std var_param_init=0, # log std = 0 => std = 1 ), ) vf = chainer.Sequential( L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, 1, initialW=winit), ) # Combine a policy and a value function into a single model model = chainerrl.links.Branched(policy, vf) opt = chainer.optimizers.Adam(3e-4, eps=1e-5) opt.setup(model) agent = PPO( model, opt, obs_normalizer=obs_normalizer, gpu=args.gpu, update_interval=args.update_interval, minibatch_size=args.batch_size, epochs=args.epochs, clip_eps_vf=None, entropy_coef=0, standardize_advantages=True, gamma=0.995, lambd=0.97, ) if args.load: agent.load(args.load) if args.demo: env = make_batch_env(True) eval_stats = experiments.eval_performance( env=env, agent=agent, n_steps=None, n_episodes=args.eval_n_runs, max_episode_len=timestep_limit) print('n_runs: {} mean: {} median: {} stdev {}'.format( args.eval_n_runs, eval_stats['mean'], eval_stats['median'], eval_stats['stdev'])) else: experiments.train_agent_batch_with_evaluation( agent=agent, env=make_batch_env(False), eval_env=make_batch_env(True), outdir=args.outdir, steps=args.steps, eval_n_steps=None, eval_n_episodes=args.eval_n_runs, eval_interval=args.eval_interval, log_interval=args.log_interval, max_episode_len=timestep_limit, save_best_so_far_agent=False, )
def forward_one_step(self, x_data, state, continuous=True, nonlinear_q='tanh', nonlinear_p='tanh', output_f = 'sigmoid', gpu=-1): output = np.zeros( x_data.shape ).astype(np.float32) nonlinear = {'sigmoid': F.sigmoid, 'tanh': F.tanh, 'softplus': self.softplus, 'relu': F.relu} nonlinear_f_q = nonlinear[nonlinear_q] nonlinear_f_p = nonlinear[nonlinear_p] output_a_f = nonlinear[output_f] # compute q(z|x) for i in range(x_data.shape[0]): x_in_t = Variable(x_data[i].reshape((1, x_data.shape[1]))) hidden_q_t = nonlinear_f_q( self.recog_in_h( x_in_t ) + self.recog_h_h( state['recog_h'] ) ) state['recog_h'] = hidden_q_t q_mean = self.recog_mean( state['recog_h'] ) q_log_sigma = 0.5 * self.recog_log_sigma( state['recog_h'] ) eps = np.random.normal(0, 1, q_log_sigma.data.shape ).astype(np.float32) if gpu >= 0: eps = cuda.to_gpu(eps) eps = Variable(eps) z = q_mean + F.exp(q_log_sigma) * eps # compute p( x | z) h0 = nonlinear_f_p( self.z(z) ) out= self.output(h0) x_0 = output_a_f( out ) state['gen_h'] = h0 if gpu >= 0: np_x_0 = cuda.to_cpu(x_0.data) output[0] = np_x_0 else: output[0] = x_0.data if continuous == True: rec_loss = F.mean_squared_error(x_0, Variable(x_data[0].reshape((1, x_data.shape[1])))) else: rec_loss = F.sigmoid_cross_entropy(out, Variable(x_data[0].reshape((1, x_data.shape[1])).astype(np.int32))) x_t = x_0 for i in range(1, x_data.shape[0]): h_t_1 = nonlinear_f_p( self.gen_in_h( x_t ) + self.gen_h_h(state['gen_h']) ) x_t_1 = self.output(h_t_1) state['gen_h'] = h_t_1 if continuous == True: output_t = output_a_f( x_t_1 ) rec_loss += F.mean_squared_error(output_t, Variable(x_data[i].reshape((1, x_data.shape[1])))) else: out = x_t_1 rec_loss += F.sigmoid_cross_entropy(out, Variable(x_data[i].reshape((1,x_data.shape[1])).astype(np.int32))) x_t = output_t = output_a_f( x_t_1 ) if gpu >= 0: np_output_t = cuda.to_cpu(output_t.data) output[i] = np_output_t else: output[i] = output_t.data KLD = -0.0005 * F.sum(1 + q_log_sigma - q_mean**2 - F.exp(q_log_sigma)) return output, rec_loss, KLD, state
def softplus(self, x): return F.log(F.exp(x) + 1)
def calculate_logistic_loss(self, y, t): xp = chainer.cuda.get_array_module(t) if xp != numpy: xp.cuda.Device(t.device).use() nr_mix = y.shape[1] // 3 logit_probs = y[:, :nr_mix] means = y[:, nr_mix:2 * nr_mix] log_scales = y[:, 2 * nr_mix:3 * nr_mix] log_scales = F.maximum( log_scales, self.scalar_to_tensor(log_scales, self.log_scale_min)) t = F.broadcast_to(t, means.shape) centered_t = t - means inv_std = F.exp(-log_scales) plus_in = inv_std * (centered_t + 1 / (self.quantize - 1)) cdf_plus = F.sigmoid(plus_in) min_in = inv_std * (centered_t - 1 / (self.quantize - 1)) cdf_min = F.sigmoid(min_in) log_cdf_plus = plus_in - F.softplus(plus_in) log_one_minus_cdf_min = -F.softplus(min_in) cdf_delta = cdf_plus - cdf_min # mid_in = inv_std * centered_t # log_pdf_mid = mid_in - log_scales - 2 * F.softplus(mid_in) log_probs = F.where( # condition t.array < self.scalar_to_tensor(t, -0.999), # true log_cdf_plus, # false F.where( # condition t.array > self.scalar_to_tensor(t, 0.999), # true log_one_minus_cdf_min, # false F.log( F.maximum(cdf_delta, self.scalar_to_tensor(cdf_delta, 1e-12))) # F.where( # # condition # cdf_delta.array > self.scalar_to_tensor(cdf_delta, 1e-5), # # true # F.log(F.maximum( # cdf_delta, self.scalar_to_tensor(cdf_delta, 1e-12))), # # false # log_pdf_mid - self.xp.log((self.quantize - 1) / 2)) )) log_probs = log_probs + F.log_softmax(logit_probs) loss = -F.mean(F.logsumexp(log_probs, axis=1)) return loss
def var_func(x): return F.exp(x)**2
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=0, help='GPU device ID. Set to -1 to use CPUs only.') parser.add_argument('--env', type=str, default='Hopper-v2', help='Gym Env ID') parser.add_argument('--seed', type=int, default=0, help='Random seed [0, 2 ** 32)') parser.add_argument('--outdir', type=str, default='results', help='Directory path to save output files.' ' If it does not exist, it will be created.') parser.add_argument('--steps', type=int, default=2 * 10 ** 6, help='Total time steps for training.') parser.add_argument('--eval-interval', type=int, default=100000, help='Interval between evaluation phases in steps.') parser.add_argument('--eval-n-runs', type=int, default=100, help='Number of episodes ran in an evaluation phase') parser.add_argument('--render', action='store_true', default=False, help='Render the env') parser.add_argument('--demo', action='store_true', default=False, help='Run demo episodes, not training') parser.add_argument('--load-pretrained', action='store_true', default=False) parser.add_argument('--pretrained-type', type=str, default="best", choices=['best', 'final']) parser.add_argument('--load', type=str, default='', help='Directory path to load a saved agent data from' ' if it is a non-empty string.') parser.add_argument('--trpo-update-interval', type=int, default=5000, help='Interval steps of TRPO iterations.') parser.add_argument('--logger-level', type=int, default=logging.INFO, help='Level of the root logger.') parser.add_argument('--monitor', action='store_true', help='Monitor the env by gym.wrappers.Monitor.' ' Videos and additional log will be saved.') args = parser.parse_args() logging.basicConfig(level=args.logger_level) # Set random seed chainerrl.misc.set_random_seed(args.seed, gpus=(args.gpu,)) args.outdir = chainerrl.experiments.prepare_output_dir(args, args.outdir) def make_env(test): env = gym.make(args.env) # Use different random seeds for train and test envs env_seed = 2 ** 32 - 1 - args.seed if test else args.seed env.seed(env_seed) # Cast observations to float32 because our model uses float32 env = chainerrl.wrappers.CastObservationToFloat32(env) if args.monitor: env = gym.wrappers.Monitor(env, args.outdir) if args.render: env = chainerrl.wrappers.Render(env) return env env = make_env(test=False) timestep_limit = env.spec.max_episode_steps obs_space = env.observation_space action_space = env.action_space print('Observation space:', obs_space) print('Action space:', action_space) assert isinstance(obs_space, gym.spaces.Box) # Normalize observations based on their empirical mean and variance obs_normalizer = chainerrl.links.EmpiricalNormalization( obs_space.low.size, clip_threshold=5) # Orthogonal weight initialization is used as OpenAI Baselines does winit = chainerrl.initializers.Orthogonal(1.) winit_last = chainerrl.initializers.Orthogonal(1e-2) action_size = action_space.low.size policy = chainer.Sequential( L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, action_size, initialW=winit_last), chainerrl.policies.GaussianHeadWithStateIndependentCovariance( action_size=action_size, var_type='diagonal', var_func=lambda x: F.exp(2 * x), # Parameterize log std var_param_init=0, # log std = 0 => std = 1 ), ) vf = chainer.Sequential( L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, 1, initialW=winit), ) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() policy.to_gpu(args.gpu) vf.to_gpu(args.gpu) obs_normalizer.to_gpu(args.gpu) # TRPO's policy is optimized via CG and line search, so it doesn't require # a chainer.Optimizer. Only the value function needs it. vf_opt = chainer.optimizers.Adam() vf_opt.setup(vf) # Draw the computational graph and save it in the output directory. fake_obs = chainer.Variable( policy.xp.zeros_like(obs_space.low, dtype=np.float32)[None], name='observation') chainerrl.misc.draw_computational_graph( [policy(fake_obs)], os.path.join(args.outdir, 'policy')) chainerrl.misc.draw_computational_graph( [vf(fake_obs)], os.path.join(args.outdir, 'vf')) # Hyperparameters in http://arxiv.org/abs/1709.06560 agent = chainerrl.agents.TRPO( policy=policy, vf=vf, vf_optimizer=vf_opt, obs_normalizer=obs_normalizer, update_interval=args.trpo_update_interval, max_kl=0.01, conjugate_gradient_max_iter=20, conjugate_gradient_damping=1e-1, gamma=0.995, lambd=0.97, vf_epochs=5, entropy_coef=0, ) if args.load or args.load_pretrained: # either load or load_pretrained must be false assert not args.load or not args.load_pretrained if args.load: agent.load(args.load) else: agent.load(chainerrl.misc.download_model( "TRPO", args.env, model_type=args.pretrained_type)[0]) if args.demo: env = make_env(test=True) eval_stats = chainerrl.experiments.eval_performance( env=env, agent=agent, n_steps=None, n_episodes=args.eval_n_runs, max_episode_len=timestep_limit) print('n_runs: {} mean: {} median: {} stdev {}'.format( args.eval_n_runs, eval_stats['mean'], eval_stats['median'], eval_stats['stdev'])) else: chainerrl.experiments.train_agent_with_evaluation( agent=agent, env=env, eval_env=make_env(test=True), outdir=args.outdir, steps=args.steps, eval_n_steps=None, eval_n_episodes=args.eval_n_runs, eval_interval=args.eval_interval, train_max_episode_len=timestep_limit, )
def gaussian_kl_divergence_keepbatch(self, mean, ln_var): var = F.exp(ln_var) kld = F.sum(mean * mean + var - ln_var - 1, axis=1) * 0.5 return kld
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=0, help='GPU device ID. Set to -1 to use CPUs only.') parser.add_argument('--env', type=str, default='Hopper-v1', help='Gym Env ID') parser.add_argument('--seed', type=int, default=0, help='Random seed [0, 2 ** 32)') parser.add_argument('--outdir', type=str, default='results', help='Directory path to save output files.' ' If it does not exist, it will be created.') parser.add_argument('--steps', type=int, default=10 ** 6, help='Total time steps for training.') parser.add_argument('--eval-interval', type=int, default=10000, help='Interval between evaluation phases in steps.') parser.add_argument('--eval-n-runs', type=int, default=10, help='Number of episodes ran in an evaluation phase') parser.add_argument('--render', action='store_true', default=False, help='Render the env') parser.add_argument('--demo', action='store_true', default=False, help='Run demo episodes, not training') parser.add_argument('--load', type=str, default='', help='Directory path to load a saved agent data from' ' if it is a non-empty string.') parser.add_argument('--trpo-update-interval', type=int, default=5000, help='Interval steps of TRPO iterations.') parser.add_argument('--logger-level', type=int, default=logging.INFO, help='Level of the root logger.') parser.add_argument('--monitor', action='store_true', help='Monitor the env by gym.wrappers.Monitor.' ' Videos and additional log will be saved.') args = parser.parse_args() logging.basicConfig(level=args.logger_level) # Set random seed chainerrl.misc.set_random_seed(args.seed, gpus=(args.gpu,)) args.outdir = chainerrl.experiments.prepare_output_dir(args, args.outdir) def make_env(test): env = gym.make(args.env) # Use different random seeds for train and test envs env_seed = 2 ** 32 - args.seed if test else args.seed env.seed(env_seed) # Cast observations to float32 because our model uses float32 env = chainerrl.wrappers.CastObservationToFloat32(env) if args.monitor: env = gym.wrappers.Monitor(env, args.outdir) if args.render: chainerrl.misc.env_modifiers.make_rendered(env) return env env = make_env(test=False) timestep_limit = env.spec.tags.get( 'wrapper_config.TimeLimit.max_episode_steps') obs_space = env.observation_space action_space = env.action_space print('Observation space:', obs_space) print('Action space:', action_space) if not isinstance(obs_space, gym.spaces.Box): print("""\ This example only supports gym.spaces.Box observation spaces. To apply it to other observation spaces, use a custom phi function that convert an observation to numpy.ndarray of numpy.float32.""") # NOQA return # Normalize observations based on their empirical mean and variance obs_normalizer = chainerrl.links.EmpiricalNormalization( obs_space.low.size) if isinstance(action_space, gym.spaces.Box): # Use a Gaussian policy for continuous action spaces policy = \ chainerrl.policies.FCGaussianPolicyWithStateIndependentCovariance( obs_space.low.size, action_space.low.size, n_hidden_channels=64, n_hidden_layers=2, mean_wscale=0.01, nonlinearity=F.tanh, var_type='diagonal', var_func=lambda x: F.exp(2 * x), # Parameterize log std var_param_init=0, # log std = 0 => std = 1 ) elif isinstance(action_space, gym.spaces.Discrete): # Use a Softmax policy for discrete action spaces policy = chainerrl.policies.FCSoftmaxPolicy( obs_space.low.size, action_space.n, n_hidden_channels=64, n_hidden_layers=2, last_wscale=0.01, nonlinearity=F.tanh, ) else: print("""\ TRPO only supports gym.spaces.Box or gym.spaces.Discrete action spaces.""") # NOQA return # Use a value function to reduce variance vf = chainerrl.v_functions.FCVFunction( obs_space.low.size, n_hidden_channels=64, n_hidden_layers=2, last_wscale=0.01, nonlinearity=F.tanh, ) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() policy.to_gpu(args.gpu) vf.to_gpu(args.gpu) obs_normalizer.to_gpu(args.gpu) # TRPO's policy is optimized via CG and line search, so it doesn't require # a chainer.Optimizer. Only the value function needs it. vf_opt = chainer.optimizers.Adam() vf_opt.setup(vf) # Draw the computational graph and save it in the output directory. fake_obs = chainer.Variable( policy.xp.zeros_like(obs_space.low, dtype=np.float32)[None], name='observation') chainerrl.misc.draw_computational_graph( [policy(fake_obs)], os.path.join(args.outdir, 'policy')) chainerrl.misc.draw_computational_graph( [vf(fake_obs)], os.path.join(args.outdir, 'vf')) # Hyperparameters in http://arxiv.org/abs/1709.06560 agent = chainerrl.agents.TRPO( policy=policy, vf=vf, vf_optimizer=vf_opt, obs_normalizer=obs_normalizer, update_interval=args.trpo_update_interval, conjugate_gradient_max_iter=20, conjugate_gradient_damping=1e-1, gamma=0.995, lambd=0.97, vf_epochs=5, entropy_coef=0, ) if args.load: agent.load(args.load) if args.demo: env = make_env(test=True) eval_stats = chainerrl.experiments.eval_performance( env=env, agent=agent, n_runs=args.eval_n_runs, max_episode_len=timestep_limit) print('n_runs: {} mean: {} median: {} stdev {}'.format( args.eval_n_runs, eval_stats['mean'], eval_stats['median'], eval_stats['stdev'])) else: chainerrl.experiments.train_agent_with_evaluation( agent=agent, env=env, eval_env=make_env(test=True), outdir=args.outdir, steps=args.steps, eval_n_runs=args.eval_n_runs, eval_interval=args.eval_interval, max_episode_len=timestep_limit, )
def __call__(self, image, generate=False): n_turn, n_word = self.n_turn, self.n_word train = self.train # train = True # traing no atai izon de okasiku naru... accum_loss = 0. sub_accum_loss = 0. batchsize = image.data.shape[0] sentence_history = [] log_prob_history = [] canvas_history = [] p_dists_history = [] # Initialize canvas of Listener canvas = chainer.Variable(self.xp.zeros(image.data.shape, np.float32), volatile='auto') loss_list = [] raw_loss_list = [] # [Speaker] # Percieve hidden_image = self.speaker.perceive(image, n_turn, train=train) # bn_list[n_turn] is used for real image for turn in range(n_turn): # [Speaker] # Express the image x compared to canvas # Perceive hidden_canvas = self.speaker.perceive(canvas, turn, train=train) # Express thought = self.speaker.think(hidden_image, hidden_canvas, turn, train=train) sampled_word_idx_seq, log_probability, p_dists = self.speaker.speak( thought, n_word=n_word, train=train) # [Listener] # Interpret the expression & Paint it into canvas # Perceive (only canvas) hidden_canvas = self.listener.perceive(canvas, turn, train=train) # Interpret the expression with current situation (canvas) message_meaning = self.listener.listen(sampled_word_idx_seq, turn, train=train) concept = self.listener.think(hidden_canvas, message_meaning, turn, train=train) # ZURU if self.zuru: # concept = F.dropout(thought, ratio=0.5, train=train) concept = thought # Paint # canvas = self.listener.painter( # canvas, concept, turn, train=train) canvas += self.listener.painter(concept, turn, train=train) # Physical limitations of canvas (leaky to make gradient active) canvas = F.clip(canvas, 0., 1.) * 0.9 + canvas * 0.1 # Save canvas_history.append(canvas) sentence_history.append(sampled_word_idx_seq) log_prob_history.append(log_probability) p_dists_history.append(p_dists) # Calculate communication loss raw_loss = (canvas - image)**2 second = reduce(lambda a, b: a * b, raw_loss.shape[1:]) raw_loss = F.reshape(raw_loss, (raw_loss.shape[0], second)) raw_loss = F.sum(raw_loss, axis=1) raw_loss_list.append(raw_loss) loss = F.sum(raw_loss) / image.data.size loss_list.append(loss) report({'l{}'.format(turn): loss}, self) report( {'p{}'.format(turn): self.xp.exp(log_probability.data.mean())}, self) # Add the last loss accum_loss += loss_list[-1] report({'loss': accum_loss}, self) # Add (minus) reinforce #reward = (1. - raw_loss_list[-1]).data reward = (-raw_loss_list[-1]).data baseline = self.baseline if not self.baseline is None \ else self.xp.mean(reward) reinforce = F.sum( sum(log_prob_history) / n_turn * (reward - baseline)) / reward.size self.baseline = self.baseline * 0.99 + self.xp.mean(reward) * 0.01 \ if not self.baseline is None \ else self.xp.mean(reward) accum_reinforce = reinforce report({'reward': accum_reinforce}, self) #sub_accum_loss -= accum_reinforce * 0.00001 #sub_accum_loss -= accum_reinforce * 100. #sub_accum_loss -= accum_reinforce * 0.1 sub_accum_loss -= accum_reinforce * 1. # Add loss at full turn if self.calc_full_turn: decay = 0.5 accum_loss_full_turn = sum(loss_list[j] * decay**(n_turn - j - 1) for j in range(n_turn - 1)) sub_accum_loss += accum_loss_full_turn report({'full': accum_loss_full_turn}, self) # Add loss of modification if self.calc_modification: margin = 0.1 accum_loss_modification = sum( F.relu(margin + loss_list[i] - loss_list[i - 1].data) for i in range(1, n_turn)) sub_accum_loss += accum_loss_modification report({'mod': accum_loss_modification}, self) # Add loss to orthogonal matrix if self.calc_orthogonal_loss: def orthogonal_regularizer(M): nM = F.normalize(M) MM = F.matmul(nM, F.transpose(nM)) iden = self.xp.identity(MM.shape[0]) return F.sum((MM - MM * iden)**2) orthogonal_loss = orthogonal_regularizer( self.speaker.language.definition.W) + \ orthogonal_regularizer( self.listener.language.definition.W) sub_accum_loss += orthogonal_loss * self.calc_orthogonal_loss report({'ortho': orthogonal_loss}, self) # Add balancing vocabulary concat_p = F.concat(sum(p_dists_history, []), axis=0) p_mean = F.sum(concat_p, axis=0) / concat_p.shape[0] report({'p_mean': p_mean}, self) # is this meaningless? perplexity = F.exp( F.sum(sum(log_prob_history)) / len(log_prob_history) / batchsize).data report({'perplexity': perplexity}, self) if self.calc_importance_loss: def importance_regularizer(p): importance = F.sum(p, axis=0) mean_i = F.sum(importance) / importance.size mean_i_bc = F.broadcast_to(mean_i[None, ], importance.shape) std_i = (F.sum( (importance - mean_i_bc)**2) / importance.size)**0.5 cv = std_i / mean_i return cv**2 importance_loss = importance_regularizer(concat_p) sub_accum_loss += importance_loss * self.calc_importance_loss report({'importance': importance_loss}, self) report({'total': accum_loss}, self) # Merge main and sub loss accum_loss += sub_accum_loss self.sub_accum_loss = sub_accum_loss.data if generate: return [[i.data for i in s] for s in sentence_history], \ [lp.data for lp in log_prob_history], \ [F.clip(cv, 0., 1.).data for cv in canvas_history] else: return accum_loss
def prob(self, x): return F.exp(self.log_prob(x))
def rl_agent(self, env): # self.policy = chainer.Sequential( # L.BatchNormalization(axis=0), # L.Linear(None, 256), # # F.dropout(ratio=.5), # F.tanh, # L.Linear(None, 128), # # F.dropout(ratio=.5), # F.tanh, # # L.Linear(None, env.action_space.low.size, initialW=winit_last), # L.Linear(None, env.action_space.low.size), # # F.sigmoid, # chainerrl.policies.GaussianHeadWithStateIndependentCovariance( # action_size=env.action_space.low.size, # var_type='diagonal', # var_func=lambda x: F.exp(2 * x), # Parameterize log std # # var_param_init=0, # log std = 0 => std = 1 # )) self.policy = chainer.Sequential( L.BatchNormalization(axis=0), L.Linear(None, 256), # F.dropout(ratio=.5), F.sigmoid, # F.relu, L.Linear(None, 128), # F.dropout(ratio=.5), F.sigmoid, # L.Linear(None, env.action_space.low.size, initialW=winit_last), L.Linear(None, env.action_space.low.size), F.sigmoid, chainerrl.policies.GaussianHeadWithStateIndependentCovariance( action_size=env.action_space.low.size, var_type='diagonal', var_func=lambda x: F.exp(2 * x), # Parameterize log std # var_param_init=0, # log std = 0 => std = 1 )) self.vf = chainer.Sequential( L.BatchNormalization(axis=0), L.Linear(None, 256), # F.dropout(ratio=.5), F.sigmoid, L.Linear(None, 128), # F.dropout(ratio=.5), F.sigmoid, L.Linear(None, 1), F.sigmoid, ) # self.vf = chainer.Sequential( # L.BatchNormalization(axis=0), # L.Linear(None, 256), # # F.dropout(ratio=.5), # F.tanh, # L.Linear(None, 128), # # F.dropout(ratio=.5), # F.tanh, # L.Linear(None, 1), # ) # Combine a policy and a value function into a single model self.model = chainerrl.links.Branched(self.policy, self.vf) self.opt = chainer.optimizers.Adam(alpha=3e-3, eps=1e-5) self.opt.setup(self.model) self.agent = PPO( self.model, self.opt, # obs_normalizer=obs_normalizer, gpu=-1, update_interval=64, minibatch_size=32, clip_eps_vf=None, entropy_coef=0.001, # standardize_advantages=args.standardize_advantages, ) return self.agent
def __call__(self): """Return a temperature as a chainer.Variable.""" return F.exp(self.log_temperature)
def exp(self): return F.exp(self._lagrange_multiplier)
def sigmoid_cross_entropy(x, z): return F.sum(F.relu(x) - x * z + F.log(1 + F.exp(-abs(x))))
def __call__(self, input_x, t): output = self.predictor(input_x) batch_size, _, grid_h, grid_w = output.shape self.seen += batch_size x, y, w, h, conf = F.split_axis(F.reshape( output, (batch_size, self.predictor.n_boxes, 5, grid_h, grid_w)), (1, 2, 3, 4), axis=2) x = F.sigmoid(x) # xのactivation y = F.sigmoid(y) # yのactivation conf = F.sigmoid(conf) # confのactivation # wとhが0になるように学習(e^wとe^hは1に近づく -> 担当するbboxの倍率1) tw = np.zeros(w.shape, dtype=np.float32) th = np.zeros(h.shape, dtype=np.float32) # 活性化後のxとyが0.5になるように学習() tx = np.tile(0.5, x.shape).astype(np.float32) ty = np.tile(0.5, y.shape).astype(np.float32) # centerの存在しないbbox誤差学習スケールは基本0.1 if self.seen < self.unstable_seen: box_learning_scale = np.tile(0.1, x.shape).astype(np.float32) else: box_learning_scale = np.tile(0, x.shape).astype(np.float32) # confidenceのtruthは基本0、iouがthresh以上のものは学習しない、ただしobjectの存在するgridのbest_boxのみ真のIOUに近づかせる tconf = np.zeros(conf.shape, dtype=np.float32) conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32) # 全bboxとtruthのiouを計算(batch単位で計算する) x_shift = Variable( np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape[1:])) y_shift = Variable( np.broadcast_to( np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape[1:])) w_anchor = Variable( np.broadcast_to( np.reshape( np.array(self.anchors, dtype=np.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape[1:])) h_anchor = Variable( np.broadcast_to( np.reshape( np.array(self.anchors, dtype=np.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape[1:])) x_shift.to_gpu() y_shift.to_gpu() w_anchor.to_gpu() h_anchor.to_gpu() best_ious = [] for batch in range(batch_size): n_truth_boxes = len(t[batch]) box_x = (x[batch] + x_shift) / grid_w box_y = (y[batch] + y_shift) / grid_h box_w = F.exp(w[batch]) * w_anchor / grid_w box_h = F.exp(h[batch]) * h_anchor / grid_h ious = [] for truth_index in range(n_truth_boxes): truth_box_x = Variable( np.broadcast_to( np.array(t[batch][truth_index]["x"], dtype=np.float32), box_x.shape)) truth_box_y = Variable( np.broadcast_to( np.array(t[batch][truth_index]["y"], dtype=np.float32), box_y.shape)) truth_box_w = Variable( np.broadcast_to( np.array(t[batch][truth_index]["w"], dtype=np.float32), box_w.shape)) truth_box_h = Variable( np.broadcast_to( np.array(t[batch][truth_index]["h"], dtype=np.float32), box_h.shape)) truth_box_x.to_gpu() truth_box_y.to_gpu() truth_box_w.to_gpu() truth_box_h.to_gpu() ious.append( multi_box_iou( Box(box_x, box_y, box_w, box_h), Box(truth_box_x, truth_box_y, truth_box_w, truth_box_h)).data.get()) ious = np.array(ious) best_ious.append(np.max(ious, axis=0)) best_ious = np.array(best_ious) # 一定以上のiouを持つanchorに対しては、confを0に下げないようにする(truthの周りのgridはconfをそのまま維持)。 tconf[best_ious > self.thresh] = conf.data.get()[ best_ious > self.thresh] conf_learning_scale[best_ious > self.thresh] = 0 # objectの存在するanchor boxのみ個別修正 abs_anchors = self.anchors / np.array([grid_w, grid_h]) for batch in range(batch_size): truth_box = t[batch][0] truth_w = int(float(truth_box["x"]) * grid_w) truth_h = int(float(truth_box["y"]) * grid_h) truth_n = 0 best_iou = 0.0 for anchor_index, abs_anchor in enumerate(abs_anchors): iou = box_iou( Box(0, 0, float(truth_box["w"]), float(truth_box["h"])), Box(0, 0, abs_anchor[0], abs_anchor[1])) if best_iou < iou: best_iou = iou truth_n = anchor_index # objectの存在するanchorについて、centerを0.5ではなく、真の座標に近づかせる。anchorのスケールを1ではなく真のスケールに近づかせる。学習スケールを1にする。 box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0 tx[batch, truth_n, :, truth_h, truth_w] = float(truth_box["x"]) * grid_w - truth_w ty[batch, truth_n, :, truth_h, truth_w] = float(truth_box["y"]) * grid_h - truth_h tw[batch, truth_n, :, truth_h, truth_w] = np.log( float(truth_box["w"]) / abs_anchors[truth_n][0]) th[batch, truth_n, :, truth_h, truth_w] = np.log( float(truth_box["h"]) / abs_anchors[truth_n][1]) # IOUの観測 full_truth_box = Box(float(truth_box["x"]), float(truth_box["y"]), float(truth_box["w"]), float(truth_box["h"])) predicted_box = Box( (x[batch][truth_n][0][truth_h][truth_w].data.get() + truth_w) / grid_w, (y[batch][truth_n][0][truth_h][truth_w].data.get() + truth_h) / grid_h, np.exp(w[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][0], np.exp(h[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][1]) predicted_iou = box_iou(full_truth_box, predicted_box) tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 10.0 # debug prints print("best confidences of each grid:") for i in range(grid_h): for j in range(grid_w): print("%2d" % (int(conf[batch, :, :, i, j].data.max() * 100)), end=" ") print() print(x[batch, truth_n, :, truth_h, truth_w].data, y[batch, truth_n, :, truth_h, truth_w].data, w[batch, truth_n, :, truth_h, truth_w].data, h[batch, truth_n, :, truth_h, truth_w].data) print(tx[batch, truth_n, :, truth_h, truth_w], ty[batch, truth_n, :, truth_h, truth_w], tw[batch, truth_n, :, truth_h, truth_w], th[batch, truth_n, :, truth_h, truth_w]) print( "best default iou: %.2f predicted iou: %.2f confidence: %.2f class: %s" % (best_iou, predicted_iou, conf[batch][truth_n][0][truth_h][truth_w].data, t[batch][0]["label"])) print("seen = %d" % self.seen) tx, ty, tw, th, tconf = Variable(tx), Variable(ty), Variable( tw), Variable(th), Variable(tconf) box_learning_scale, conf_learning_scale = Variable( box_learning_scale), Variable(conf_learning_scale) tx.to_gpu() ty.to_gpu() tw.to_gpu() th.to_gpu() tconf.to_gpu() box_learning_scale.to_gpu() conf_learning_scale.to_gpu() x_loss = (tx - x)**2 y_loss = (ty - y)**2 w_loss = (tw - w)**2 h_loss = (th - h)**2 c_loss = (tconf - conf)**2 loss = F.sum((x_loss + y_loss + w_loss + h_loss) * box_learning_scale + c_loss * conf_learning_scale) / 2 print("x_loss: %f y_loss: %f w_loss: %f h_loss: %f c_loss: %f" % (F.sum(x_loss).data, F.sum(y_loss).data, F.sum(w_loss).data, F.sum(h_loss).data, F.sum(c_loss).data)) return loss
def log_prob_from_logit(x): c = x.shape[1] m = F.max(x, 1, keepdims=True) b = m + F.log(F.sum(F.exp(x - F.repeat(m, c, 1)), 1, keepdims=True)) return x - F.repeat(b, c, 1)
def update(self, done): if done: # bootstrap なし R_rev = [0] A_rev = [0] self.V_preds.append(0) else: # bootstrap あり R_rev = [self.V_preds[-1]] A_rev = [0] self.R_preds = self.R_preds[:-1] # bootstrap分削除 self.rewards = self.rewards[:-1] # 累積報酬計算 r_rev = self.rewards[::-1] #print(r_rev) for r in r_rev: R_rev.append(r + GAMMA * R_rev[-1]) R = np.array(R_rev[1:][::-1], dtype=Variable) # Advantage計算 N = len(r_rev) assert len(self.V_preds) == N + 1 for i in range(N): delta = r_rev[i] + GAMMA * self.V_preds[N - i] - self.V_preds[N - i - 1] A_rev.append(delta + GAMMA * LAMBDA * A_rev[-1]) A = np.array(A_rev[1:][::-1]) #print(R) #print(A) #exit() # MBP loss V_preds = np.array(self.V_preds[:-1]) R_preds = np.array(self.R_preds) #print(V_preds.shape) #print(R_preds.shape) #print(R.shape) #print(A.shape) #print(V_preds) #print(R_preds) #print(R) assert len(R) == len(R_preds) == len(V_preds) == len(A) R_loss = (np.sum((V_preds - R)**2) + np.sum((R_preds - R)**2)) / 2 self.loss += ALPHA_RETURN * R_loss # Policy 勾配 A_ = 0 H = 0 self.actions = self.actions[1:] # 初期値分削除 for i in range(N): log_pi = self.log_pies[i] #print(log_pi) #print(self.actions[i]) #print(A[i]) A_ += A[i][0] * log_pi[self.actions[i] == 1][0] #print(F.exp(log_pi)) #print(log_pi) #print(np.dot(F.exp(log_pi), log_pi)) H += -ALPHA_ENTROPY * np.dot(F.exp(log_pi), log_pi) self.loss -= A_ + H # gradient ascend # update print(" loss: {}".format(self.loss)) self.cleargrads() self.loss.backward() self.optimizer.update() self.loss.unchain_backward() # 新しくlossを作り直しているのでいらん?
def mkFilter(self, mean_x, mean_y, ln_var, ln_stride, ln_gamma): eps = 1e-8 """ make Attention Filters, need B Filters for a minibatch(composed of B data), shared between each color map [input] C: 1[mono],3[color] mean_x: Bx1[mono] Bx1[color] (chainer.Variable) mean_y: Bx1[mono] Bx1[color] (chainer.Variable) ln_var: Bx1[mono] Bx1[color] (chainer.Variable) ln_stride: Bx1[mono] Bx1[color] (chainer.Variable) ln_gamma: Bx1[mono] Bx1[color] (Variable) [output] Fx : BxPxW[mono] 3BxPxW[color] matrix (Variable) Fy : BxPxH[mono] 3BxPxH[color] matrix (Variable) Gamma BxHxW[mono] 3BxHxW[color] (Variable) """ P = self.patchsize B = mean_x.data.shape[0] H = self.height W = self.width mean_x = 0.5 * (W + 1.0) * (mean_x + 1.0) # (B,1) mean_y = 0.5 * (H + 1.0) * (mean_y + 1.0) # (B,1) var = F.exp(ln_var) stride = (self.L_edge - 1.0) / (P - 1.0) * F.exp(ln_stride) gamma = F.exp(ln_gamma) mu_x = F.broadcast_to(mean_x, (P, B, 1)) # (B,1) -> (P,B,1) mu_x = F.transpose(mu_x, (1, 0, 2)) # -> (B,P,1) mu_y = F.broadcast_to(mean_y, (P, B, 1)) # (B,1) -> (P,B,1) mu_y = F.transpose(mu_y, (1, 0, 2)) # -> (B,P,1) stride = F.broadcast_to(stride, (P, B, 1)) # (B,1) -> (P,B,1) stride = F.transpose(stride, (1, 0, 2)) # -> (B,P,1) var_x = F.broadcast_to(var, (P, W, B, 1)) # (B,1) -> (P,W,B,1) var_x = F.transpose(var_x, (2, 0, 1, 3)) # -> (B,P,W,1) var_y = F.broadcast_to(var, (P, H, B, 1)) # (B,1) -> (P,H,B,1) var_y = F.transpose(var_y, (2, 0, 1, 3)) # -> (B,P,H,1) mu_x = mu_x + F.broadcast_to(self.Parray, (B, P, 1)) * stride # (B,P,1) mu_y = mu_y + F.broadcast_to(self.Parray, (B, P, 1)) * stride # (B,P,1) mu_x = F.transpose(F.broadcast_to(mu_x, (self.width, B, P, 1)), (1, 2, 0, 3)) mu_x = F.broadcast_to(self.Warray, (B, P, W)) - F.reshape(mu_x, (B, P, W)) mu_y = F.transpose(F.broadcast_to(mu_y, (self.height, B, P, 1)), (1, 2, 0, 3)) mu_y = F.broadcast_to(self.Harray, (B, P, H)) - F.reshape(mu_y, (B, P, H)) var_x = F.reshape(var_x, (B, P, W)) # (B,P,W) -> (B,P,W) var_y = F.reshape(var_y, (B, P, H)) # (B,P,H) -> (B,P,H) x_square = -0.5 * (mu_x / var_x)**2 # (B,P,W) y_square = -0.5 * (mu_y / var_y)**2 # (B,P,H) x_gauss = F.exp(x_square) y_gauss = F.exp(y_square) xsum = F.sum(x_gauss, 2) # (B,P) ysum = F.sum(y_gauss, 2) # (B,P) Zx_prev = F.transpose(F.broadcast_to(xsum, (W, B, P)), (1, 2, 0)) enable = Variable(Zx_prev.data > eps) Zx = F.where(enable, Zx_prev, XP.fnonzeros(Zx_prev.data.shape, val=1.0) * eps) Zy_prev = F.transpose(F.broadcast_to(ysum, (H, B, P)), (1, 2, 0)) enable = Variable(Zy_prev.data > eps) Zy = F.where(enable, Zy_prev, XP.fnonzeros(Zy_prev.data.shape, val=1.0) * eps) Fx = x_gauss / Zx Fy = y_gauss / Zy gamma_ = F.broadcast_to(gamma, (P, P, self.C, B, 1)) # (B,1) -> (H,W,C,B,1) Gamma = F.reshape(F.transpose(gamma_, (4, 3, 2, 0, 1)), (self.C * B, P, P)) # -> (C*B,H,W) Fx_ = F.broadcast_to(Fx, (self.C, B, P, W)) Fy_ = F.broadcast_to(Fy, (self.C, B, P, H)) Fx = F.reshape(F.transpose(Fx_, (1, 0, 2, 3)), (self.C * B, P, W)) Fy = F.reshape(F.transpose(Fy_, (1, 0, 2, 3)), (self.C * B, P, H)) self.Fx = Fx self.Fy = Fy self.Gamma = Gamma
def normalize_2d(x): exp = F.exp(x[0]) sums = F.sum(F.sum(exp, axis=-1), axis=-1) expanded = F.expand_dims(F.expand_dims(sums, axis=-1), axis=-1) denominator = F.tile(expanded, (1, 160, 210)) return exp / denominator
def multibox_focal_loss(mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k): """Computes multibox losses. This is a loss function used in [#]_. This function returns :obj:`loc_loss` and :obj:`conf_loss`. :obj:`loc_loss` is a loss for localization and :obj:`conf_loss` is a loss for classification. The formulas of these losses can be found in the equation (2) and (3) in the original paper. .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector. ECCV 2016. Args: mb_locs (chainer.Variable or array): The offsets and scales for predicted bounding boxes. Its shape is :math:`(B, K, 4)`, where :math:`B` is the number of samples in the batch and :math:`K` is the number of default bounding boxes. mb_confs (chainer.Variable or array): The classes of predicted bounding boxes. Its shape is :math:`(B, K, n\_class)`. This function assumes the first class is background (negative). gt_mb_locs (chainer.Variable or array): The offsets and scales for ground truth bounding boxes. Its shape is :math:`(B, K, 4)`. gt_mb_labels (chainer.Variable or array): The classes of ground truth bounding boxes. Its shape is :math:`(B, K)`. k (float): A coefficient which is used for hard negative mining. This value determines the ratio between the number of positives and that of mined negatives. The value used in the original paper is :obj:`3`. Returns: tuple of chainer.Variable: This function returns two :obj:`chainer.Variable`: :obj:`loc_loss` and :obj:`conf_loss`. """ mb_locs = chainer.as_variable(mb_locs) mb_confs = chainer.as_variable(mb_confs) gt_mb_locs = chainer.as_variable(gt_mb_locs) #gt_mb_labels = chainer.as_variable(gt_mb_labels) xp = chainer.cuda.get_array_module(gt_mb_locs.array) #print(gt_mb_labels.array.device) #print('Multibox') #print(chainer.cuda.get_device_from_array(gt_mb_labels.array)) #with gt_mb_labels.array.device: #positive = gt_mb_labels.array > 0 positive = gt_mb_labels > 0 n_positive = positive.sum() if n_positive == 0: z = chainer.Variable(xp.zeros((), dtype=np.float32)) return z, z loc_loss = F.huber_loss(mb_locs, gt_mb_locs, 1, reduce='no') loc_loss = F.sum(loc_loss, axis=-1) loc_loss *= positive.astype(loc_loss.dtype) loc_loss = F.sum(loc_loss) / n_positive #conf_loss = _elementwise_softmax_cross_entropy(mb_confs, gt_mb_labels) #hard_negative = _hard_negative(conf_loss.array, positive, k) #conf_loss *= xp.logical_or(positive, hard_negative).astype(conf_loss.dtype) alpha = 0.75 gamma = 2 t = gt_mb_labels.reshape(gt_mb_labels.shape[0] * gt_mb_labels.shape[1], ) class_num = mb_confs.shape[2] # class_num includes back ground class t = F.cast(chainer.as_variable(xp.eye(class_num)[t]), loc_loss.dtype) t = t.reshape(gt_mb_labels.shape[0], gt_mb_labels.shape[1], class_num) p = F.sigmoid(mb_confs) #pt = p * t + (1 - p) * (1 - t) # pt = p if t > 0 else 1-p #w = alpha * t + (1 - alpha) * (1 - t) # w = alpha if t > 0 else 1 - alpha #w = w * ((1 - pt) ** gamma) pt = F.where(t.array > 0, p, 1 - p) w = (1 - pt)**gamma w = F.where(t.array > 0, alpha * w, (1 - alpha) * w) # From Pytorch implemetation binary_cross_entropy_with_logits # https://pytorch.org/docs/master/_modules/torch/nn/functional.html#binary_cross_entropy_with_logits max_val = F.clip(-mb_confs, x_min=0.0, x_max=10.0e+12) focal_loss = mb_confs - mb_confs * t + max_val + F.log( F.exp(-max_val) + F.exp(-mb_confs - max_val)) focal_loss = F.sum(focal_loss * w) / n_positive #focal_loss = -F.sum(w * F.log(pt + 1e-12)) / n_positive return loc_loss, focal_loss
def __call__(self, input_x, t, train=True): output_fcn, output_yolo = self.predictor(input_x, train=train) if self.FCN: if train: loss_fcn = F.softmax_cross_entropy(output_fcn, t) reporter.report({'loss': loss_fcn}, self) return loss_fcn else: loss = F.softmax(output_fcn) return loss batch_size, _, grid_h, grid_w = output_yolo.shape self.seen += batch_size x, y, w, h, conf, prob = F.split_axis(F.reshape( output_yolo, (batch_size, self.predictor.n_boxes, self.predictor.n_classes_yolo + 5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2) x = F.sigmoid(x) y = F.sigmoid(y) conf = F.sigmoid(conf) prob = F.transpose(prob, (0, 2, 1, 3, 4)) prob = F.softmax(prob) tw = np.zeros( w.shape, dtype=np.float32) # wとhが0になるように学習(e^wとe^hは1に近づく -> 担当するbboxの倍率1) th = np.zeros(h.shape, dtype=np.float32) tx = np.tile(0.5, x.shape).astype(np.float32) # 活性化後のxとyが0.5になるように学習() ty = np.tile(0.5, y.shape).astype(np.float32) if self.seen < self.unstable_seen: box_learning_scale = np.tile(0.1, x.shape).astype(np.float32) else: box_learning_scale = np.tile(0, x.shape).astype(np.float32) tconf = np.zeros( conf.shape, dtype=np.float32 ) # confidenceのtruthは基本0、iouがthresh以上のものは学習しない、ただしobjectの存在するgridのbest_boxのみ真のIOUに近づかせる conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32) tprob = prob.data.copy() # best_anchor以外は学習させない(自身との二乗和誤差 = 0) # 全bboxとtruthのiouを計算(batch単位で計算する) x_shift = Variable( np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape[1:])) y_shift = Variable( np.broadcast_to( np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape[1:])) w_anchor = Variable( np.broadcast_to( np.reshape( np.array(self.anchors, dtype=np.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape[1:])) h_anchor = Variable( np.broadcast_to( np.reshape( np.array(self.anchors, dtype=np.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape[1:])) x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu( ) best_ious = [] for batch in range(batch_size): #n_truth_boxes = len(t[batch]) n_truth_boxes = int(sum(x[0] != 10.0 for x in t[batch])) # ?? box_x = (x[batch] + x_shift) / grid_w box_y = (y[batch] + y_shift) / grid_h box_w = F.exp(w[batch]) * w_anchor / grid_w box_h = F.exp(h[batch]) * h_anchor / grid_h ious = [] for truth_index in range(n_truth_boxes): t = chainer.cuda.to_cpu(t) # ?? truth_box_x = Variable( np.broadcast_to( np.array(t[batch][truth_index][1], dtype=np.float32), box_x.shape)) truth_box_y = Variable( np.broadcast_to( np.array(t[batch][truth_index][2], dtype=np.float32), box_y.shape)) truth_box_w = Variable( np.broadcast_to( np.array(t[batch][truth_index][3], dtype=np.float32), box_w.shape)) truth_box_h = Variable( np.broadcast_to( np.array(t[batch][truth_index][4], dtype=np.float32), box_h.shape)) truth_box_x.to_gpu(), truth_box_y.to_gpu(), truth_box_w.to_gpu( ), truth_box_h.to_gpu() ious.append( multi_box_iou( Box(box_x, box_y, box_w, box_h), Box(truth_box_x, truth_box_y, truth_box_w, truth_box_h)).data.get()) if ious: ious = np.array(ious) best_ious.append(np.max(ious, axis=0)) else: best_ious.append(0) best_ious = np.array(best_ious) # 一定以上のiouを持つanchorに対しては、confを0に下げないようにする(truthの周りのgridはconfをそのまま維持)。 tconf[best_ious > self.thresh] = conf.data.get()[ best_ious > self.thresh] conf_learning_scale[best_ious > self.thresh] = 0 # objectの存在するanchor boxのみ、x、y、w、h、conf、probを個別修正 abs_anchors = self.anchors / np.array([grid_w, grid_h]) for batch in range(batch_size): for truth_box in t[batch]: if truth_box[0] == 10.0: # ?? continue truth_w = int(float(truth_box[1]) * grid_w) truth_h = int(float(truth_box[2]) * grid_h) truth_n = 0 best_iou = 0.0 for anchor_index, abs_anchor in enumerate(abs_anchors): iou = box_iou( Box(0, 0, float(truth_box[3]), float(truth_box[4])), Box(0, 0, abs_anchor[0], abs_anchor[1])) if best_iou < iou: best_iou = iou truth_n = anchor_index # objectの存在するanchorについて、centerを0.5ではなく、真の座標に近づかせる。anchorのスケールを1ではなく真のスケールに近づかせる。学習スケールを1にする。 box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0 tx[batch, truth_n, :, truth_h, truth_w] = float(truth_box[1]) * grid_w - truth_w ty[batch, truth_n, :, truth_h, truth_w] = float(truth_box[2]) * grid_h - truth_h tw[batch, truth_n, :, truth_h, truth_w] = np.log( float(truth_box[3]) / abs_anchors[truth_n][0]) th[batch, truth_n, :, truth_h, truth_w] = np.log( float(truth_box[4]) / abs_anchors[truth_n][1]) tprob[batch, :, truth_n, truth_h, truth_w] = 0 tprob[batch, int(truth_box[0]), truth_n, truth_h, truth_w] = 1 # IOUの観測 full_truth_box = Box(float(truth_box[1]), float(truth_box[2]), float(truth_box[3]), float(truth_box[4])) predicted_box = Box( (x[batch][truth_n][0][truth_h][truth_w].data.get() + truth_w) / grid_w, (y[batch][truth_n][0][truth_h][truth_w].data.get() + truth_h) / grid_h, np.exp(w[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][0], np.exp(h[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][1]) predicted_iou = box_iou(full_truth_box, predicted_box) tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 10.0 # debug prints maps = F.transpose(prob[batch], (2, 3, 1, 0)).data #print("seen = %d" % self.seen) # loss計算 tx, ty, tw, th, tconf, tprob = Variable(tx), Variable(ty), Variable( tw), Variable(th), Variable(tconf), Variable(tprob) box_learning_scale, conf_learning_scale = Variable( box_learning_scale), Variable(conf_learning_scale) tx.to_gpu(), ty.to_gpu(), tw.to_gpu(), th.to_gpu(), tconf.to_gpu( ), tprob.to_gpu() box_learning_scale.to_gpu() conf_learning_scale.to_gpu() x_loss = F.sum((tx - x)**2 * box_learning_scale) / 2 y_loss = F.sum((ty - y)**2 * box_learning_scale) / 2 w_loss = F.sum((tw - w)**2 * box_learning_scale) / 2 h_loss = F.sum((th - h)**2 * box_learning_scale) / 2 c_loss = F.sum((tconf - conf)**2 * conf_learning_scale) / 2 p_loss = F.sum((tprob - prob)**2) / 2 #print("x_loss: %f y_loss: %f w_loss: %f h_loss: %f c_loss: %f p_loss: %f" % # (F.sum(x_loss).data, F.sum(y_loss).data, F.sum(w_loss).data, F.sum(h_loss).data, F.sum(c_loss).data, F.sum(p_loss).data) #) reporter.report({'x_loss': F.sum(x_loss).data}, self) reporter.report({'y_loss': F.sum(y_loss).data}, self) reporter.report({'w_loss': F.sum(w_loss).data}, self) reporter.report({'h_loss': F.sum(h_loss).data}, self) reporter.report({'c_loss': F.sum(c_loss).data}, self) reporter.report({'p_loss': F.sum(p_loss).data}, self) loss_yolo = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss reporter.report({'loss': loss_yolo}, self) return loss_yolo
def forward_one_step(self, x_data, y_data, n_layers_recog, n_layers_gen, nonlinear_q='softplus', nonlinear_p='softplus', output_f='sigmoid', type_qx='gaussian', type_px='gaussian', gpu=-1): x = Variable(x_data) y = Variable(y_data) # set non-linear function nonlinear = { 'sigmoid': F.sigmoid, 'tanh': F.tanh, 'softplus': self.softplus, 'relu': F.relu } nonlinear_f_q = nonlinear[nonlinear_q] nonlinear_f_p = nonlinear[nonlinear_p] output_activation = { 'sigmoid': F.sigmoid, 'identity': self.identity, 'tanh': F.tanh } output_a_f = output_activation[output_f] hidden_q = [nonlinear_f_q(self.recog_x(x) + self.recog_y(y))] # compute q(z|x, y) for i in range(n_layers_recog - 1): hidden_q.append( nonlinear_f_q(getattr(self, 'recog_%i' % i)(hidden_q[-1]))) q_mean = getattr(self, 'recog_mean')(hidden_q[-1]) q_log_sigma = 0.5 * getattr(self, 'recog_log')(hidden_q[-1]) eps = np.random.normal( 0, 1, (x.data.shape[0], q_log_sigma.data.shape[1])).astype('float32') if gpu >= 0: eps = cuda.to_gpu(eps) eps = Variable(eps) z = q_mean + F.exp(q_log_sigma) * eps # compute q(x |y, z) hidden_p = [nonlinear_f_p(self.gen_y(y) + self.gen_z(z))] for i in range(n_layers_gen - 1): hidden_p.append( nonlinear_f_p(getattr(self, 'gen_%i' % i)(hidden_p[-1]))) hidden_p.append(output_a_f(getattr(self, 'gen_out')(hidden_p[-1]))) output = hidden_p[-1] rec_loss = F.mean_squared_error(output, x) KLD = -0.5 * F.sum(1 + q_log_sigma - q_mean**2 - F.exp(q_log_sigma)) / (x_data.shape[0] * x_data.shape[1]) return rec_loss, KLD, output
def binary_cross_entropy_with_logits(x, t): max_val = F.clip(-x, x_min=0., x_max=np.inf) loss = x - x * t + max_val + F.log(F.exp(-max_val) + F.exp(-x - max_val)) return F.sum(loss)
def generate(self, sample_x, sample_y, n_layers_recog, n_layers_gen, nonlinear_q='relu', nonlinear_p='relu', output_f='sigmoid', gpu=-1): x = Variable(sample_x) y = Variable(sample_y) # set non-linear function nonlinear = { 'sigmoid': F.sigmoid, 'tanh': F.tanh, 'softplus': self.softplus, 'relu': F.relu } nonlinear_f_q = nonlinear[nonlinear_q] nonlinear_f_p = nonlinear[nonlinear_p] output_activation = { 'sigmoid': F.sigmoid, 'identity': self.identity, 'tanh': F.tanh } output_a_f = output_activation[output_f] # compute q(z|x, y) hidden_q = [nonlinear_f_q(self.recog_x(x) + self.recog_y(y))] for i in range(n_layers_recog - 1): hidden_q.append( nonlinear_f_q(getattr(self, 'recog_%i' % i)(hidden_q[-1]))) q_mean = getattr(self, 'recog_mean')(hidden_q[-1]) q_log_sigma = 0.5 * getattr(self, 'recog_log')(hidden_q[-1]) eps = np.random.normal( 0, 1, (x.data.shape[0], q_log_sigma.data.shape[1])).astype('float32') if gpu >= 0: eps = cuda.to_gpu(eps) eps = Variable(eps) z = q_mean + F.exp(q_log_sigma) * eps outputs = np.zeros((sample_y.shape[1], sample_x.shape[1]), dtype=np.float32) for label in range(sample_y.shape[1]): sample_y = np.zeros((1, sample_y.shape[1]), dtype=np.float32) sample_y[0][label] = 1. # compute q(x |y, z) hidden_p = [ nonlinear_f_p(self.gen_y(Variable(sample_y)) + self.gen_z(z)) ] for i in range(n_layers_gen - 1): hidden_p.append( nonlinear_f_p(getattr(self, 'gen_%i' % i)(hidden_p[-1]))) hidden_p.append(output_a_f(getattr(self, 'gen_out')(hidden_p[-1]))) output = hidden_p[-1] outputs[label] = output.data return outputs
times = [] for i in range(10): W.cleargrad() b.cleargrad() x.cleargrad() log_sigma2.cleargrad() xp.random.seed(777) start = time.time() log_alpha = F.clip(log_sigma2 - F.log(W * W + 1e-8), -8., 8.) clip_mask = (log_alpha.data > loga_threshold) _W = (1. - clip_mask) * W mu = F.linear(x, _W) si = F.sqrt(F.linear(x * x, F.exp(log_alpha) * _W * _W) + 1e-8) normal_noise = xp.random.standard_normal(mu.shape).astype('f') y = mu + si * normal_noise if b is not None: y = F.bias(y, b) F.sum(y).backward() vs2 = [y.data, W.grad, b.grad, x.grad, log_sigma2.grad, ] times.append(time.time() - start) print('composition', numpy.mean(times[5:])) for v1, v2 in zip(vs1, vs2):
def __call__(self, input_x, t): isVola = input_x.volatile output = self.predictor(input_x) batch_size, _, grid_h, grid_w = output.shape if self.predictor.train == True: self.seen += batch_size x, y, w, h, conf, prob = F.split_axis(F.reshape(output, (batch_size, self.predictor.n_boxes, self.predictor.n_classes+5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2) x = F.sigmoid(x) # xのactivation y = F.sigmoid(y) # yのactivation conf = F.sigmoid(conf) # confのactivation prob = F.transpose(prob, (0, 2, 1, 3, 4)) prob = F.softmax(prob) # probabilityのactivation # 教師データの用意 tw = np.zeros(w.shape, dtype=np.float32) # wとhが0になるように学習(e^wとe^hは1に近づく -> 担当するbboxの倍率1) th = np.zeros(h.shape, dtype=np.float32) tx = np.tile(0.5, x.shape).astype(np.float32) # 活性化後のxとyが0.5になるように学習() ty = np.tile(0.5, y.shape).astype(np.float32) if self.seen < self.unstable_seen: # centerの存在しないbbox誤差学習スケールは基本0.1 box_learning_scale = np.tile(0.1, x.shape).astype(np.float32) else: box_learning_scale = np.tile(0, x.shape).astype(np.float32) tconf = np.zeros(conf.shape, dtype=np.float32) # confidenceのtruthは基本0、iouがthresh以上のものは学習しない、ただしobjectの存在するgridのbest_boxのみ真のIOUに近づかせる conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32) tprob = prob.data.copy() # best_anchor以外は学習させない(自身との二乗和誤差 = 0) # 全bboxとtruthのiouを計算(batch単位で計算する) x_shift = Variable(np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape[1:]), volatile=isVola) y_shift = Variable(np.broadcast_to(np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape[1:]), volatile=isVola) w_anchor = Variable(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape[1:]), volatile=isVola) h_anchor = Variable(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape[1:]), volatile=isVola) x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu() best_ious = [] for batch in range(batch_size): n_truth_boxes = len(t[batch]) box_x = (x[batch] + x_shift) * 1.0 / grid_w box_y = (y[batch] + y_shift) * 1.0 / grid_h box_w = F.exp(w[batch]) * w_anchor * 1.0 / grid_w box_h = F.exp(h[batch]) * h_anchor * 1.0 / grid_h ious = [] for truth_index in range(n_truth_boxes): truth_box_x = Variable(np.broadcast_to(np.array(t[batch][truth_index]["x"], dtype=np.float32), box_x.shape), volatile=isVola) truth_box_y = Variable(np.broadcast_to(np.array(t[batch][truth_index]["y"], dtype=np.float32), box_y.shape), volatile=isVola) truth_box_w = Variable(np.broadcast_to(np.array(t[batch][truth_index]["w"], dtype=np.float32), box_w.shape), volatile=isVola) truth_box_h = Variable(np.broadcast_to(np.array(t[batch][truth_index]["h"], dtype=np.float32), box_h.shape), volatile=isVola) truth_box_x.to_gpu(), truth_box_y.to_gpu(), truth_box_w.to_gpu(), truth_box_h.to_gpu() ious.append(multi_box_iou(Box(box_x, box_y, box_w, box_h), Box(truth_box_x, truth_box_y, truth_box_w, truth_box_h)).data.get()) ious = np.array(ious) best_ious.append(np.max(ious, axis=0)) best_ious = np.array(best_ious) # 一定以上のiouを持つanchorに対しては、confを0に下げないようにする(truthの周りのgridはconfをそのまま維持)。 tconf[best_ious > self.thresh] = conf.data.get()[best_ious > self.thresh] conf_learning_scale[best_ious > self.thresh] = 0 # objectの存在するanchor boxのみ、x、y、w、h、conf、probを個別修正 abs_anchors = self.anchors / np.array([grid_w, grid_h]) for batch in range(batch_size): for truth_box in t[batch]: truth_w = int(float(truth_box["x"]) * grid_w) truth_h = int(float(truth_box["y"]) * grid_h) truth_n = 0 best_iou = 0.0 for anchor_index, abs_anchor in enumerate(abs_anchors): iou = box_iou(Box(0, 0, float(truth_box["w"]), float(truth_box["h"])), Box(0, 0, abs_anchor[0], abs_anchor[1])) if best_iou < iou: best_iou = iou truth_n = anchor_index # objectの存在するanchorについて、centerを0.5ではなく、真の座標に近づかせる。anchorのスケールを1ではなく真のスケールに近づかせる。学習スケールを1にする。 box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0 tx[batch, truth_n, :, truth_h, truth_w] = float(truth_box["x"]) * grid_w - truth_w ty[batch, truth_n, :, truth_h, truth_w] = float(truth_box["y"]) * grid_h - truth_h tw[batch, truth_n, :, truth_h, truth_w] = np.log(float(truth_box["w"]) * 1.0 / abs_anchors[truth_n][0]) th[batch, truth_n, :, truth_h, truth_w] = np.log(float(truth_box["h"]) * 1.0 / abs_anchors[truth_n][1]) tprob[batch, :, truth_n, truth_h, truth_w] = 0 tprob[batch, int(truth_box["label"]), truth_n, truth_h, truth_w] = 1 # IOUの観測 full_truth_box = Box(float(truth_box["x"]), float(truth_box["y"]), float(truth_box["w"]), float(truth_box["h"])) predicted_box = Box( (x[batch][truth_n][0][truth_h][truth_w].data.get() + truth_w) * 1.0 / grid_w, (y[batch][truth_n][0][truth_h][truth_w].data.get() + truth_h) * 1.0 / grid_h, np.exp(w[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][0], np.exp(h[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][1] ) predicted_iou = box_iou(full_truth_box, predicted_box) tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 10.0 # debug prints maps = F.transpose(prob[batch], (2, 3, 1, 0)).data # print("best confidences and best conditional probability and predicted class of each grid:") # for i in range(grid_h): # for j in range(grid_w): # print("%2d" % (int(conf[batch, :, :, i, j].data.max() * 100)), end=" ") # print(" ", end="") # for j in range(grid_w): # print("%2d" % (maps[i][j][int(maps[i][j].max(axis=1).argmax())].argmax()), end=" ") # print(" ", end="") # for j in range(grid_w): # print("%2d" % (maps[i][j][int(maps[i][j].max(axis=1).argmax())].max()*100), end=" ") # print() # # print("best default iou: %.2f predicted iou: %.2f confidence: %.2f class: %s" % (best_iou, predicted_iou, conf[batch][truth_n][0][truth_h][truth_w].data, t[batch][0]["label"])) # print("-------------------------------") #print("seen = %d" % self.seen) # loss計算 tx, ty, tw, th, tconf, tprob = Variable(tx, volatile=isVola), Variable(ty, volatile=isVola), Variable(tw, volatile=isVola), Variable(th, volatile=isVola), Variable(tconf, volatile=isVola), Variable(tprob, volatile=isVola) box_learning_scale, conf_learning_scale = Variable(box_learning_scale, volatile=isVola), Variable(conf_learning_scale, volatile=isVola) tx.to_gpu(), ty.to_gpu(), tw.to_gpu(), th.to_gpu(), tconf.to_gpu(), tprob.to_gpu() box_learning_scale.to_gpu() conf_learning_scale.to_gpu() x_loss = F.sum((tx - x) ** 2 * box_learning_scale) / 2.0 y_loss = F.sum((ty - y) ** 2 * box_learning_scale) / 2.0 w_loss = F.sum((tw - w) ** 2 * box_learning_scale) / 2.0 h_loss = F.sum((th - h) ** 2 * box_learning_scale) / 2.0 c_loss = F.sum((tconf - conf) ** 2 * conf_learning_scale) / 2.0 p_loss = F.sum((tprob - prob) ** 2) / 2.0 print("x_loss: %f y_loss: %f w_loss: %f h_loss: %f c_loss: %f p_loss: %f" % (F.sum(x_loss).data, F.sum(y_loss).data, F.sum(w_loss).data, F.sum(h_loss).data, F.sum(c_loss).data, F.sum(p_loss).data) ) loss = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss return loss
def __call__(self, x, rel_y, neighbor_entities, neighbor_dict, assign, entities, relations, RC, EC, t, assignEtoN): if self.layer == 0: return self.easy_case(x, neighbor_entities, neighbor_dict, assign, entities, relations) #print 'entities', len(entities) if len(neighbor_dict) == 1: x = [x] else: x = F.split_axis(x, len(neighbor_dict), axis=0) if len(entities) == 1: t = [t] else: t = F.split_axis(t, len(entities), axis=0) rel_y = F.split_axis(rel_y, len(RC), axis=0) result = [] for i, e in enumerate(entities): rt = t[i] tmpXList = [] tmpValList = [] tmpListV1 = [] tmpListV2 = [] tmpList2 = [] tmpVFlag = [] for k in assignEtoN[i]: v = neighbor_dict[k] rx = x[v] if (e, k) in relations: r = relations[(e, k)] * 2 else: r = relations[(k, e)] * 2 + 1 r_rep = rel_y[r // 2] #calc the attention value tmp2 = F.concat((rx, rt), axis=1) tmp2 = F.concat((tmp2, r_rep), axis=1) tmpList2.append(tmp2) tmp = F.concat((rx, r_rep), axis=1) #tmp = F.pad(F.concat((rx,r_rep),axis=0), ((0,0),(0,1)), 'constant') #tmp = F.reshape(tmp,(1,1,2,-1)) if r % 2 == 0: tmpListV1.append(tmp) tmpVFlag.append(1) else: tmpListV2.append(tmp) tmpVFlag.append(-1) #print len(tmpListV1), len(tmpListV2), len(tmpList2) oV1 = [] oV2 = [] oAtt = [] if (len(tmpListV1) > 0): inputV1 = F.concat(tmpListV1, axis=0) #print inputV1.shape outputV1 = getattr(self, self.forwardH[0][0])(inputV1) #print outputV1.shape oV1 = F.split_axis(outputV1, len(tmpListV1), axis=0) if (len(tmpListV2) > 0): inputV2 = F.concat(tmpListV2, axis=0) outputV2 = getattr(self, self.forwardT[0][0])(inputV2) oV2 = F.split_axis(outputV2, len(tmpListV2), axis=0) inputAtt = F.concat(tmpList2, axis=0) #print inputAtt.shape outputAtt = getattr(self, self.AttL[0][0])(inputAtt) #print outputAtt.shape oAtt = F.split_axis(outputAtt, len(tmpList2), axis=0) cnt1 = 0 cnt2 = 0 for a, flag in enumerate(tmpVFlag): tmpAtt = oAtt[a] tmpAtt = F.repeat(tmpAtt, 200) tmpAtt = F.reshape(tmpAtt, [-1, 200]) tmpValList.append(F.exp(tmpAtt)) if flag == 1: tmprx = oV1[cnt1] cnt1 += 1 tmpXList.append(tmprx) elif flag == -1: tmprx = oV2[cnt2] cnt2 += 1 tmpXList.append(tmprx) #print len(tmpXList), len(tmpValList) for a, val in enumerate(tmpValList): #print tmpXList[a].data #print val.data tmpXList[a] = tmpXList[a] * val result.append(sum(tmpXList) / (sum(tmpValList))) #print result[0].shape #(1,1,1,200) should be (1,200) result = F.concat(result, axis=0) #print len(entities) #print result.shape return result
def __call__(self, x): x = F.log(x) + 13.0 h = F.leaky_relu(self.l1(x)) h = F.leaky_relu(self.l2(h)) h = F.leaky_relu(self.l3(h)) return F.exp(self.l9(h) - 13.0)
def __call__(self, x): out = x out = self.conv(out) bias = out * cf.broadcast_to((cf.exp(self.scale * 3.0)), out.shape) # bias = out return bias
def decode_cupy(self, z): with chainer.using_config('train', False), chainer.no_backprop_mode(): z = chainer.Variable(z.T) x = chf.exp(self.decode(z)).data.T # exp(log(power)) = power return x