Example #1
0
    def __call__(self, x_data, y_data):
        x = chainer.Variable(x_data)
        y = chainer.Variable(y_data)

        # q(z|x,y)
        rh1 = F.relu(self.recog1(x))
        rh2 = F.relu(self.recog2(rh1))
        recog_mean = self.recog_mean(rh2)
        #recog_log_sigma = 0.5 * self.recog_log_sigma(rh2)
        recog_log_sigma = self.recog_log_sigma(rh2)

        eps = np.random.normal(0, 1, (x.data.shape[0], nz)).astype(np.float32)
        eps = chainer.Variable(eps)

        # z = mu + sigma + epsilon
        z = recog_mean + F.exp(0.5 * recog_log_sigma) * eps
        #z = recog_mean + F.exp(recog_log_sigma) * eps

        gh1 = F.relu(self.gen1(z))
        gh2 = F.relu(self.gen2(gh1))
        gen_mean = self.gen_mean(gh2)
        output = F.sigmoid(gen_mean)
        loss = F.mean_squared_error(output, y)
        kld = -0.5 * F.sum(1 + recog_log_sigma - recog_mean**2 - F.exp(recog_log_sigma)) / (x_data.shape[0] * x_data.shape[1])
        return loss, kld, output
Example #2
0
File: model.py Project: emakryo/mdn
 def __call__(self, x, y):
     h = F.sigmoid(self.l1_(x))
     coef = F.softmax(self.coef_(h))
     mean = F.reshape(self.mean_(h), (-1,self.NUM_MIXTURE,self.OUT_DIM))
     logvar = self.logvar_(h)
     mean, y = F.broadcast(mean, F.reshape(y, (-1,1,self.OUT_DIM)))
     return F.sum(
         coef*F.exp(-0.5*F.sum((y-mean)**2, axis=2)*F.exp(-logvar))/
         ((2*np.pi*F.exp(logvar))**(0.5*self.OUT_DIM)),axis=1)
Example #3
0
def permutation_probability_loss(x, t, length):

    length = length.reshape(-1, 1)
    mask = np.tile(np.arange(x.shape[1]).reshape(1, -1), (x.shape[0],  1)) < length
    mask = chainer.Variable(mask)
    padding = chainer.Variable(np.zeros(x.shape, dtype=x.dtype))

    log_p_x = logsoftmax(x, mask, padding, axis=1)
    log_p_t = logsoftmax(t, mask, padding, axis=1)

    loss = F.exp(log_p_t) * log_p_t - F.exp(log_p_t) * log_p_x

    return F.sum(loss) / float(x.shape[0])
Example #4
0
    def forward(self, x_data, state):
        """
        Does encode/decode on x_data.
        :param x_data: input data (a single timestep) as a numpy.ndarray
        :param state: previous state of RNN
        :param nonlinear_q: nonlinearity used in q(z|x) (encoder)
        :param nonlinear_p: nonlinearity used in p(x|z) (decoder)
        :param output_f: #TODO#
        :return: output, recognition loss, KL Divergence, state
        """
        # =====[ Step 1: Compute q(z|x) - encoding step, get z ]=====
        # Forward encoding
        for i in range(x_data.shape[0]):
            sum_ones_reshape = np.sum(x_data[i].reshape((1, x_data.shape[1])))
            sum_ones_reg = np.sum(x_data[i])
            # grab the i-th element of x
            x = Variable(x_data[i].reshape((1, x_data.shape[1])))
            h_in = self.recog_x_h(x) + self.recog_h_h(state["h_rec"])
            c_t, h_t = F.lstm(state["c_rec"], h_in)
            state.update({"c_rec": c_t, "h_rec": h_t})
        # Compute q_mean and q_log_sigma
        q_mean = self.recog_mean(state["h_rec"])
        q_log_sigma = 0.5 * self.recog_log_sigma(state["h_rec"])
        # Compute KL divergence based on q_mean and q_log_sigma
        KLD = -0.0005 * F.sum(1 + q_log_sigma - q_mean ** 2 - F.exp(q_log_sigma))
        # Compute as q_mean + noise*exp(q_log_sigma)
        eps = Variable(np.random.normal(0, 1, q_log_sigma.data.shape).astype(np.float32))
        z = q_mean + F.exp(q_log_sigma) * eps

        # =====[ Step 2: Compute p(x|z) - decoding step ]=====
        # Initial step
        output = []
        h_in = self.gen_z_h(z)
        c_t, h_t = F.lstm(state["c_gen"], h_in)
        state.update({"c_gen": c_t, "h_gen": h_t})
        rec_loss = Variable(np.zeros((), dtype=np.float32))
        for i in range(x_data.shape[0]):
            # Get output and loss
            x_t = self.output(h_t)
            output.append(x_t.data)
            # print("size of x_t output data sequence: " + str(x_t.data.shape))

            rec_loss += self.loss_func(x_t, Variable(x_data[i].reshape((1, x_data.shape[1]))))
            # Get next hidden state
            h_in = self.gen_x_h(x_t) + self.gen_h_h(state["h_gen"])
            c_t, h_t = F.lstm(state["c_gen"], h_in)
            state.update({"c_gen": c_t, "h_gen": h_t})

        # =====[ Step 3: Compute KL-Divergence based on all terms ]=====
        return np.array(output), rec_loss, KLD, state
	def __call__(self, fs, bs, h):
		'''
		Attentionの計算
		:param fs: 順向きのEncoderの中間ベクトルが記録されたリスト
		:param bs: 逆向きのEncoderの中間ベクトルが記録されたリスト
		:param h: Decoderで出力された中間ベクトル
		:return: 順向きのEncoderの中間ベクトルの加重平均と逆向きのEncoderの中間ベクトルの加重平均
		'''
		batch_size = h.data.shape[0] # ミニバッチのサイズを記憶
		ws = [] # ウェイトを記録するためのリストの初期化
		sum_w = Variable(xp.zeros((batch_size, 1), dtype='float32')) # ウェイトの合計値を計算するための値を初期化
		# Encoderの中間ベクトルとDecoderの中間ベクトルを使ってウェイトの計算
		for f, b in zip(fs, bs):
			w = F.tanh(self.fh(f)+self.bh(b)+self.hh(h)) # 順向きEncoderの中間ベクトル、逆向きEncoderの中間ベクトル、Decoderの中間ベクトルを使ってウェイトの計算
			w = F.exp(self.hw(w)) # softmax関数を使って正規化する
			ws.append(w) # 計算したウェイトを記録
			sum_w += w
		# 出力する加重平均ベクトルの初期化
		att_f = Variable(xp.zeros((batch_size, self.hidden_size), dtype='float32'))
		att_b = Variable(xp.zeros((batch_size, self.hidden_size), dtype='float32'))
		for f, b, w in zip(fs, bs, ws):
			w /= sum_w # ウェイトの和が1になるように正規化
			# ウェイト * Encoderの中間ベクトルを出力するベクトルに足していく
			att_f += F.reshape(F.batch_matmul(f, w), (batch_size, self.hidden_size))
			att_b += F.reshape(F.batch_matmul(b, w), (batch_size, self.hidden_size))
		return att_f, att_b
 def likelihood_ratio(self, other, a):
     """
     Compute p_self(a) / p_other(a)
     """
     logli = self.logli(a)
     other_logli = other.logli(a)
     return F.exp(logli - other_logli)
Example #7
0
    def transform(self, images, normalized=False):
        '''Transform image data to latent space.

        Parameters
        ----------
        images : array-like shape (n_images, image_width, image_height,
                                   n_colors)
            Input numpy array of images.
        normalized [optional] : bool
            Normalization flag that specifies whether pixel data is normalized
            to a [0,1] scale.

        Returns
        -------
        latent_vec : array-like shape (n_images, latent_dim)
        '''

        n_samp = images.shape[0]
        x_encoding = images.flatten().reshape((n_samp, -1))
        x_encoding = chainer.Variable(x_encoding)
        if not normalized:
            x_encoding /= 255.
        x_encoded = self._encode(x_encoding)
        mean, std = F.split_axis(x_encoded, 2, 1)
        # Create `latent_dim` N(0,1) normal samples.
        samples = np.random.standard_normal(mean.data.shape).astype('float32')
        if self.flag_gpu:
            samples = cuda.to_gpu(samples)
        samples = chainer.Variable(samples)
        # Scale samples to model trained parameters.
        sample_set = samples * F.exp(0.5*std) + mean

        return sample_set.data
Example #8
0
    def __call__(self, h, dist):
        """
        Args:
            h (numpy.ndarray): axis 0 represents minibatch index,
                axis 1 represents atom_index and axis2 represents
                feature dimension.
            dist (numpy.ndarray): axis 0 represents minibatch index,
                axis 1 and 2 represent distance between atoms.

        """
        mb, atom, ch = h.shape
        if ch != self.hidden_dim:
            raise ValueError('h.shape[2] {} and hidden_dim {} must be same!'
                             .format(ch, self.hidden_dim))
        embedlist = self.xp.arange(
            self.num_rbf).astype('f') * self.radius_resolution
        dist = functions.reshape(dist, (mb, atom, atom, 1))
        dist = functions.broadcast_to(dist, (mb, atom, atom, self.num_rbf))
        dist = functions.exp(- self.gamma * (dist - embedlist) ** 2)
        dist = functions.reshape(dist, (-1, self.num_rbf))
        dist = self.dense1(dist)
        dist = functions.softplus(dist)
        dist = self.dense2(dist)
        dist = functions.softplus(dist)
        dist = functions.reshape(dist, (mb, atom, atom, self.hidden_dim))
        h = functions.reshape(h, (mb, atom, 1, self.hidden_dim))
        h = functions.broadcast_to(h, (mb, atom, atom, self.hidden_dim))
        h = functions.sum(h * dist, axis=1)
        return h
 def __call__(self, annotion_list, back_word_list, p):
     """
     Calculate the annotion and back word value
     :param annotion_list:
     :param back_word_list:
     :param p: hidden value
     :return:
     """
     batch_size = p.data.shape[0]
     exponential_list = []
     sum_exponential = XP.fzeros((batch_size, 1))
     # Calculate the total value list and total value
     # Prepare the Convoluation
     for annotion, back_word in zip(annotion_list, back_word_list):
         weight = functions.tanh(self.annotion_weight(annotion) + self.back_weight(back_word) + self.pw(p))
         exponential = functions.exp(self.weight_exponential(weight))
         exponential_list.append(exponential)
         sum_exponential += exponential
     ZEROS = XP.fzeros((batch_size, self.hidden_size))
     annotion_value = ZEROS
     back_word_value = ZEROS
     # Calculate the Convolution Value each annotion and back word
     for annotion, back_word, exponential in zip(annotion_list, back_word_list, exponential_list):
         exponential /= sum_exponential
         annotion_value += functions.reshape(functions.batch_matmul(annotion, exponential), (batch_size, self.hidden_size))
         back_word_value += functions.reshape(functions.batch_matmul(back_word, exponential), (batch_size, self.hidden_size))
     return annotion_value, back_word_value
Example #10
0
File: models.py Project: kzky/works
    def __call__(self, x, y):
        """
        Parameters
        -----------------
        x: Variable
            Feature of unlabeled samples.
        y: Variable
            Feature of unlabeled samples.
        """
        
        g, x, y = F.broadcast(*[self.gamma, x, y])
        x_g = x * g
        y_g = y * g

        x_g_norm = F.sum(x_g**2, axis=1)  
        y_g_norm = F.sum(y_g**2, axis=1)
        x_g_y_g = F.linear(x_g, y_g)
        
        x_g_norm, x_g_y_g, y_g_norm = \
                                      F.broadcast(
                                          *[x_g_norm,
                                            x_g_y_g,
                                            F.expand_dims(y_g_norm, 1)])
        #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm))
        u = x_g_norm - 2 * x_g_y_g+ y_g_norm
        print(np.min(u.data))
        print(len((np.where(u.data < 0)[0])), np.prod(u.data.shape))
        time.sleep(0.5)
        return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
Example #11
0
File: models.py Project: kzky/works
    def __call__(self, x, y):
        """
        Parameters
        -----------------
        x: Variable
            Feature of unlabeled samples.
        y: Variable
            Feature of unlabeled samples.
        """
        
        g, x, y = F.broadcast(*[self.gamma, x, y])
        x_g = x * g
        y_g = y * g

        x_g_norm = F.sum(x_g**2, axis=1)  
        y_g_norm = F.sum(y_g**2, axis=1)
        x_g_y_g = F.linear(x_g, y_g)
        
        x_g_norm, x_g_y_g, y_g_norm = \
                                      F.broadcast(
                                          *[x_g_norm,
                                            x_g_y_g,
                                            F.expand_dims(y_g_norm, 1)])
        #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm))
        return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
    def forward_one_step(self, x_data, c_data, y_data, state, train=True):
        x = chainer.Variable(x_data, volatile=not train)
        t = chainer.Variable(y_data, volatile=not train)
        c = chainer.Variable(c_data, volatile=not train)

        h1_in = self.l1_first(x) + self.l1_recur(state['h1']) + self.l1_w(state['w'])
        c1, h1 = F.lstm(state['c1'], h1_in)

        # soft window
        ws = F.exp(self.lw(h1))
        w_mixws, w_gains, w_means = split_axis_by_widths(ws, 3)
        w_means += state['w_means']
        w = self.forward_window(w_mixws, w_gains, w_means, c)

        h2_in = self.l2_first(x) + self.l2_recur(state['h2']) + self.l1_w(w) + self.l2_input(h1)
        c2, h2 = F.lstm(state['c2'], h2_in)

        h3_in = self.l3_first(x) + self.l3_recur(state['h3']) + self.l1_w(w) + self.l3_input(h2)
        c3, h3 = F.lstm(state['c3'], h3_in)

        y = self.l4(F.concat(h1, h2, h3))

        state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2, 'c3': c3, 'h3': h3,
                 'w': w, 'w_means': w_means}
        return state, loss_func(self.noutput_gauss, y, t)
Example #13
0
File: models.py Project: kzky/works
    def __call__(self, x, y):
        """
        Parameters
        -----------------
        x: Variable
            Feature of unlabeled samples.
        y: Variable
            Feature of unlabeled samples.
        """

        g = F.broadcast_to(
            F.gaussian(
                np.array([0], dtype=np.float32),
                np.array([np.exp(1)], dtype=np.float32)), x.shape)
            
        x_g = x * g
        y_g = y * g

        x_g_norm = F.sum(x_g**2, axis=1)  
        y_g_norm = F.sum(y_g**2, axis=1)
        x_g_y_g = F.linear(x_g, y_g)
        
        x_g_norm, x_g_y_g, y_g_norm = \
                                      F.broadcast(
                                          *[x_g_norm,
                                            x_g_y_g,
                                            F.expand_dims(y_g_norm, 1)])
        #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm))
        return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
Example #14
0
def gaussian_kl_divergence(mean, ln_var):
    """Calculate KL-divergence between given gaussian and the standard one.

    Given two variable ``mean`` representing :math:`\\mu` and ``ln_var``
    representing :math:`\\log(\\sigma^2)`, this function returns a variable
    representing KL-divergence between given multi-dimensional gaussian
    :math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)`

    .. math::

       D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)),

    where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2`
    and :math:`I` is an identity matrix.

    Args:
        mean (~chainer.Variable): A variable representing mean of given
            gaussian distribution, :math:`\\mu`.
        ln_var (~chainer.Variable): A variable representing logarithm of
            variance of given gaussian distribution, :math:`\\log(\\sigma^2)`.

    Returns:
        ~chainer.Variable: A variable representing KL-divergence between
            given gaussian distribution and the standard gaussian.

    """
    assert isinstance(mean, variable.Variable)
    assert isinstance(ln_var, variable.Variable)

    J = mean.data.size
    var = F.exp(ln_var)
    return (F.sum(mean * mean) + F.sum(var) - F.sum(ln_var) - J) * 0.5
Example #15
0
def _infer_z(mu, ln_var):
    batch_size = mu.data.shape[0]
    var = F.exp(ln_var)
    z = F.gaussian(mu, ln_var)
    kl = -F.sum(1 + ln_var - mu ** 2 - var) / 2
    kl /= batch_size
    return z, kl
  def __call__(self, a_list, b_list, p, sentence_length, window_size):
    batch_size = p.data.shape[0]
    SENTENCE_LENGTH = XP.fnonzeros((batch_size, 1),sentence_length)
    e_list = []
    sum_e = XP.fzeros((batch_size, 1))
    s = functions.tanh(self.ts(p))
    pos =  SENTENCE_LENGTH * functions.sigmoid(self.sp(s))

    # Develop batch logic to set to zero the components of a and b which are out of the window
    # Big question: Do I have to iterate over each element in the batch? That would suck.
    # One logic: Get global alignment matrix of (batch x) hidden size x sentence length and then another matrix of (batch x) sentence length which
    # will essentially be a matrix containing the gaussian distrubution weight and there will be zeros where the sentence position falls out of the window
    # Another logic: Create a matrix of (batch x) sentence length where there will be 1 for each position in the window

    # Separate the attention weights for a and b cause forward is different from backward.

    for a, b in zip(a_list, b_list):
      w = functions.tanh(self.aw(a) + self.bw(b) + self.pw(p))
      e = functions.exp(self.we(w))
      e_list.append(e)
      sum_e += e
    ZEROS = XP.fzeros((batch_size, self.hidden_size))
    aa = ZEROS
    bb = ZEROS
    for a, b, e in zip(a_list, b_list, e_list):
      e /= sum_e
      aa += a * e
      bb += b * e
    return aa, bb
Example #17
0
	def log_pz(self, z, mean, ln_var, test=False):
		if self.type_pz == "gaussianmarg":
			# \int q(z)logp(z)dz = -(J/2)*log2pi - (1/2)*sum_{j=1}^{J} (mu^2 + var)
			# See Appendix B [Auto-Encoding Variational Bayes](http://arxiv.org/abs/1312.6114)
			log_pz = -0.5 * (math.log(2.0 * math.pi) + mean * mean + F.exp(ln_var))
		elif self.type_pz == "gaussian":
			log_pz = -0.5 * math.log(2.0 * math.pi) - 0.5 * z ** 2
		return F.sum(log_pz, axis=1)
def gauss_bernoulli_params(m, y):
    width = [m, 2 * m, 2 * m, m, 1]
    y_mixws, y_means, y_stdds, y_corrs, y_e = split_axis_by_widths(y, width)
    y_mixws = F.softmax(y_mixws)
    y_means0, y_means1 = split_axis_by_widths(y_means, 2)
    y_stdds0, y_stdds1 = split_axis_by_widths(F.exp(y_stdds), 2)
    y_corrs = F.tanh(y_corrs)
    return (y_mixws, y_means0, y_means1, y_stdds0, y_stdds1, y_corrs), y_e
def cosine_similarity(x, y, eps=1e-6):
    n1, n2, n3 = x.data.shape
    _, m2, _ = y.data.shape
    z = F.batch_matmul(x, y, transb=True)
    x2 = F.broadcast_to(F.reshape(F.sum(x * x, axis=2), (n1, n2, 1)), (n1, n2, m2))
    y2 = F.broadcast_to(F.reshape(F.sum(y * y, axis=2), (n1, 1, m2)), (n1, n2, m2))
    z /= F.exp(F.log(x2 * y2 + eps) / 2)
    return z
Example #20
0
	def log_pz(self, z, mean, ln_var):
		if self.type_pz == "gaussianmarg":
			# \int q(z)logp(z)dz = -(J/2)*log2pi - (1/2)*sum_{j=1}^{J} (mu^2 + var)
			# See Appendix B [Auto-Encoding Variational Bayes](http://arxiv.org/abs/1312.6114)
			# See https://github.com/dpkingma/nips14-ssl/blob/master/anglepy/models/VAE_YZ_X.py line 106
			log_pz = -0.5 * (math.log(2.0 * math.pi) + mean * mean + F.exp(ln_var))
		elif self.type_pz == "gaussian":
			log_pz = -0.5 * math.log(2.0 * math.pi) - 0.5 * z ** 2
		return F.sum(log_pz, axis=1)
Example #21
0
	def gaussian_nll_keepbatch(self, x, mean, ln_var, clip=True):
		if clip:
			clip_min = math.log(0.001)
			clip_max = math.log(10)
			ln_var = F.clip(ln_var, clip_min, clip_max)
		x_prec = F.exp(-ln_var)
		x_diff = x - mean
		x_power = (x_diff * x_diff) * x_prec * 0.5
		return F.sum((math.log(2.0 * math.pi) + ln_var) * 0.5 + x_power, axis=1)
def gaussian_mixture_2d_ref(*inputs):
    w, m1, m2, s1, s2, c, x1, x2 = inputs
    z1 = vec_sub_mat(x1, m1, lhs_bwd=False) / s1
    z2 = vec_sub_mat(x2, m2, lhs_bwd=False) / s2
    z1 = (z1 - c * z2)**2.0
    z2 = 1.0 - c**2.0
    z3 = 2.0 * numpy.pi * s1 * s2 * z2 ** 0.5
    r = w * functions.exp(- z1 / (2.0 * z2)) / z3
    return r
Example #23
0
 def _forward(self, img_batch):
     batch = chainer.Variable(img_batch / 255.)
     encoded = self._encode(batch)
     # Split latent space into `\mu` and `\sigma` parameters
     mean, std = F.split_axis(encoded, 2, 1)
     # Create `latent_dim` N(0,1) normal samples.
     samples = np.random.standard_normal(mean.data.shape).astype('float32')
     if self.flag_gpu:
         samples = cuda.to_gpu(samples)
     samples = chainer.Variable(samples)
     # Scale samples to model trained parameters.
     sample_set = samples * F.exp(0.5*std) + mean
     output = self._decode(sample_set)
     reconstruction_loss = F.mean_squared_error(output, batch)
     # Construct and scale KL Divergence loss.
     kl_div = -0.5 * F.sum(1 + std - mean ** 2 - F.exp(std))
     kl_div /= (img_batch.shape[1] * img_batch.shape[0])
     return reconstruction_loss, kl_div, output
 def __call__(self, x):
     h = F.leaky_relu(self.l1(x))
     h = F.leaky_relu(self.l2(h))
     h = F.leaky_relu(self.l3(h))
     h = F.leaky_relu(self.l4(h))
     h = F.leaky_relu(self.l5(h))
     h = F.leaky_relu(self.l6(h))
     h = F.leaky_relu(self.l7(h))
     return F.exp(self.l9(h)-13.0)
Example #25
0
    def vae_forward(self, noisy_h_data, h_data,n_layers_recog, n_layers_gen,
                  nonlinear_q='softplus', nonlinear_p='softplus', gpu=-1,train=True):
        from random import gauss
        
        noisy_inputs = Variable(noisy_h_data) #  For non-whole
        inputs = Variable(h_data)

        # set non-linear function
        nonlinear_dict = {'sigmoid': F.sigmoid, 'tanh': F.tanh, 'softplus': F.softplus, 'relu': F.relu,
                     'clipped_relu': F.clipped_relu, 'leaky_relu': F.leaky_relu}
        nonlinear_f_q = nonlinear_dict[nonlinear_q]
        nonlinear_f_p = nonlinear_dict[nonlinear_p]

        chain = [noisy_inputs]

        # compute q(z|x, y)
        for i in range(n_layers_recog):
            chain.append(F.dropout(nonlinear_f_q(getattr(self, 'vae_recog_%i' % i)(chain[-1])),train=train))

        recog_out = getattr(self, 'vae_recog_%i' % n_layers_recog)(chain[-1])

        log_sigma_out = 0.5 * (getattr(self, 'log_sigma')(chain[-1]))
        
        # np.random.seed(123)

        eps = np.random.normal(0, 1, (inputs.data.shape[0], log_sigma_out.data.shape[1])).astype('float32')
        if gpu >= 0:
            eps = cuda.to_gpu(eps)
        eps = Variable(eps)
        z   = recog_out + F.exp(log_sigma_out) * eps

        chain  += [recog_out, z]

        for i in range(n_layers_gen):
            chain.append(F.dropout(nonlinear_f_p(getattr(self, 'vae_gen_%i' % i)(chain[-1])),train=train))

        # chain.append(F.sigmoid(getattr(self, 'vae_gen_%i' % (n_layers_gen))(chain[-1])))
        chain.append(getattr(self, 'vae_gen_%i' % (n_layers_gen))(chain[-1]))
        output = chain[-1]

        rec_loss = F.mean_squared_error(output, inputs)
        KLD = -0.5 * F.sum(1 + log_sigma_out - recog_out**2 - F.exp(log_sigma_out)) / (inputs.data.shape[0]*inputs.data.shape[1])

        return rec_loss, KLD, output
Example #26
0
    def kl_div(self, other):
        logli = F.log_softmax(self.logits)
        other_logli = F.log_softmax(other.logits)

        # new_prob_var = new_dist_info_vars["prob"]
        # Assume layout is N * A
        return F.sum(
            F.exp(logli) * (logli - other_logli),
            axis=-1
        )
Example #27
0
    def encode(self, data, test=False):
        x = self.enc(data, test=test)
        mean, ln_var = F.split_axis(x, 2, 1)
        samp = np.random.standard_normal(mean.data.shape).astype('float32')
        samp = Variable(samp)
        if self.flag_gpu:
            samp.to_gpu()
        z = samp * F.exp(0.5*ln_var) + mean

        return z, mean, ln_var
Example #28
0
    def logli(self, a):
        a = F.cast(a, np.float32)
        # transform back to standard normal
        zs = (a - self.means) * F.exp(-self.log_stds)

        # density of standard normal: f(z) = (2*pi*det|Σ|)^(-n/2) * exp(-|x|^2/2)
        # the return value should be log f(z)
        return - F.sum(self.log_stds, axis=-1) - \
            0.5 * F.sum(F.square(zs), axis=-1) - \
            0.5 * self.means.shape[-1] * np.log(2 * np.pi)
    def forward_one_step(self, x_data, y_data, n_layers_recog, n_layers_gen, nonlinear_q='softplus', nonlinear_p='softplus', output_f = 'sigmoid', type_qx='gaussian', type_px='gaussian', gpu=-1):
        x = Variable(x_data)
        y = Variable(y_data)

        # set non-linear function
        nonlinear = {'sigmoid': F.sigmoid, 'tanh': F.tanh, 'softplus': self.softplus, 'relu': F.relu}
        nonlinear_f_q = nonlinear[nonlinear_q]
        nonlinear_f_p = nonlinear[nonlinear_p]

        output_activation = {'sigmoid': F.sigmoid, 'identity': self.identity, 'tanh': F.tanh}
        output_a_f = output_activation[output_f]

        hidden_q = [ nonlinear_f_q( self.recog_x( x ) + self.recog_y( y ) ) ]

        # compute q(z|x, y)

        for i in range(n_layers_recog-1):
            hidden_q.append(nonlinear_f_q(getattr(self, 'recog_%i' % i)(hidden_q[-1])))


        q_mean = getattr(self, 'recog_mean')(hidden_q[-1])
        q_log_sigma = 0.5 * getattr(self, 'recog_log')(hidden_q[-1])

        eps = np.random.normal(0, 1, (x.data.shape[0], q_log_sigma.data.shape[1])).astype('float32')
        if gpu >= 0:
            eps = cuda.to_gpu(eps)

        eps = Variable(eps)
        z   = q_mean + F.exp(q_log_sigma) * eps

        # compute q(x |y, z)
        hidden_p = [ nonlinear_f_p( self.gen_y( y ) + self.gen_z( z ) ) ]

        for i in range(n_layers_gen-1):
            hidden_p.append(nonlinear_f_p(getattr(self, 'gen_%i' % i)(hidden_p[-1])))

        hidden_p.append(output_a_f(getattr(self, 'gen_out')(hidden_p[-1])))
        output = hidden_p[-1]

        rec_loss = F.mean_squared_error(output, x)
        KLD = -0.5 * F.sum(1 + q_log_sigma - q_mean**2 - F.exp(q_log_sigma)) / (x_data.shape[0]*x_data.shape[1])

        return rec_loss, KLD, output
Example #30
0
    def kl_div(self, other):
        """
        Given the distribution parameters of two diagonal multivariate Gaussians, compute their KL divergence (vectorized)

        Reference: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence#Kullback.E2.80.93Leibler_divergence_for_multivariate_normal_distributions

        In general, for two n-dimensional distributions, we have

        D_KL(N1||N2) = 1/2 ( tr(Σ_2^{-1}Σ_1) + (μ_2 - μ_1)^T Σ_2^{-1} (μ_2 - μ_1) - n + ln(det(Σ_2) / det(Σ_1)) )

        Here, Σ_1 and Σ_2 are diagonal. Hence this equation can be simplified. In terms of the parameters of this method,

            - ln(det(Σ_2) / det(Σ_1)) = sum(2 * (log_stds_2 - log_stds_1), axis=-1)

            - (μ_2 - μ_1)^T Σ_2^{-1} (μ_2 - μ_1) = sum((means_1 - means_2)^2 / vars_2, axis=-1)

            - tr(Σ_2^{-1}Σ_1) = sum(vars_1 / vars_2, axis=-1)

        Where

            - vars_1 = exp(2 * log_stds_1)

            - vars_2 = exp(2 * log_stds_2)

        Combined together, we have

        D_KL(N1||N2) = 1/2 ( tr(Σ_2^{-1}Σ_1) + (μ_2 - μ_1)^T Σ_2^{-1} (μ_2 - μ_1) - n + ln(det(Σ_2) / det(Σ_1)) )
                     = sum(1/2 * ((vars_1 - vars_2) / vars_2 + (means_1 - means_2)^2 / vars_2 + 2 * (log_stds_2 - log_stds_1)), axis=-1)
                     = sum( ((means_1 - means_2)^2 + vars_1 - vars_2) / (2 * vars_2) + (log_stds_2 - log_stds_1)), axis=-1)

        :param means_1: List of mean parameters of the first distribution
        :param log_stds_1: List of log standard deviation parameters of the first distribution
        :param means_2: List of mean parameters of the second distribution
        :param log_stds_2: List of log standard deviation parameters of the second distribution
        :return: An array of KL divergences.
        """

        vars = F.exp(2 * self.log_stds)
        other_vars = F.exp(2 * other.log_stds)

        return F.sum((F.square(self.means - other.means) + vars - other_vars) /
                     (2 * other_vars + 1e-8) + other.log_stds - self.log_stds, axis=-1)
Example #31
0
def main():
    import logging

    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='GPU to use, set to -1 if no GPU.')
    parser.add_argument('--env',
                        type=str,
                        default='Hopper-v2',
                        help='OpenAI Gym MuJoCo env to perform algorithm on.')
    parser.add_argument('--num-envs',
                        type=int,
                        default=1,
                        help='Number of envs run in parallel.')
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='Random seed [0, 2 ** 32)')
    parser.add_argument('--outdir',
                        type=str,
                        default='results',
                        help='Directory path to save output files.'
                        ' If it does not exist, it will be created.')
    parser.add_argument('--steps',
                        type=int,
                        default=2 * 10**6,
                        help='Total number of timesteps to train the agent.')
    parser.add_argument('--eval-interval',
                        type=int,
                        default=100000,
                        help='Interval in timesteps between evaluations.')
    parser.add_argument('--eval-n-runs',
                        type=int,
                        default=100,
                        help='Number of episodes run for each evaluation.')
    parser.add_argument('--render',
                        action='store_true',
                        help='Render env states in a GUI window.')
    parser.add_argument('--demo',
                        action='store_true',
                        help='Just run evaluation, not training.')
    parser.add_argument('--load',
                        type=str,
                        default='',
                        help='Directory to load agent from.')
    parser.add_argument('--logger-level',
                        type=int,
                        default=logging.INFO,
                        help='Level of the root logger.')
    parser.add_argument('--monitor',
                        action='store_true',
                        help='Wrap env with gym.wrappers.Monitor.')
    parser.add_argument('--log-interval',
                        type=int,
                        default=1000,
                        help='Interval in timesteps between outputting log'
                        ' messages during training')
    parser.add_argument('--update-interval',
                        type=int,
                        default=2048,
                        help='Interval in timesteps between model updates.')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        help='Number of epochs to update model for per PPO'
                        ' iteration.')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        help='Minibatch size')
    args = parser.parse_args()

    logging.basicConfig(level=args.logger_level)

    # Set a random seed used in ChainerRL
    misc.set_random_seed(args.seed, gpus=(args.gpu, ))

    # Set different random seeds for different subprocesses.
    # If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3].
    # If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7].
    process_seeds = np.arange(args.num_envs) + args.seed * args.num_envs
    assert process_seeds.max() < 2**32

    args.outdir = experiments.prepare_output_dir(args, args.outdir)

    def make_env(process_idx, test):
        env = gym.make(args.env)
        # Use different random seeds for train and test envs
        process_seed = int(process_seeds[process_idx])
        env_seed = 2**32 - 1 - process_seed if test else process_seed
        env.seed(env_seed)
        # Cast observations to float32 because our model uses float32
        env = chainerrl.wrappers.CastObservationToFloat32(env)
        if args.monitor:
            env = chainerrl.wrappers.Monitor(env, args.outdir)
        if args.render:
            env = chainerrl.wrappers.Render(env)
        return env

    def make_batch_env(test):
        return chainerrl.envs.MultiprocessVectorEnv([
            functools.partial(make_env, idx, test)
            for idx, env in enumerate(range(args.num_envs))
        ])

    # Only for getting timesteps, and obs-action spaces
    sample_env = gym.make(args.env)
    timestep_limit = sample_env.spec.tags.get(
        'wrapper_config.TimeLimit.max_episode_steps')
    obs_space = sample_env.observation_space
    action_space = sample_env.action_space
    print('Observation space:', obs_space)
    print('Action space:', action_space)

    assert isinstance(action_space, gym.spaces.Box)

    # Normalize observations based on their empirical mean and variance
    obs_normalizer = chainerrl.links.EmpiricalNormalization(obs_space.low.size,
                                                            clip_threshold=5)

    # While the original paper initialized weights by normal distribution,
    # we use orthogonal initialization as the latest openai/baselines does.
    winit = chainerrl.initializers.Orthogonal(1.)
    winit_last = chainerrl.initializers.Orthogonal(1e-2)

    action_size = action_space.low.size
    policy = chainer.Sequential(
        L.Linear(None, 64, initialW=winit),
        F.tanh,
        L.Linear(None, 64, initialW=winit),
        F.tanh,
        L.Linear(None, action_size, initialW=winit_last),
        chainerrl.policies.GaussianHeadWithStateIndependentCovariance(
            action_size=action_size,
            var_type='diagonal',
            var_func=lambda x: F.exp(2 * x),  # Parameterize log std
            var_param_init=0,  # log std = 0 => std = 1
        ),
    )

    vf = chainer.Sequential(
        L.Linear(None, 64, initialW=winit),
        F.tanh,
        L.Linear(None, 64, initialW=winit),
        F.tanh,
        L.Linear(None, 1, initialW=winit),
    )

    # Combine a policy and a value function into a single model
    model = chainerrl.links.Branched(policy, vf)

    opt = chainer.optimizers.Adam(3e-4, eps=1e-5)
    opt.setup(model)

    agent = PPO(
        model,
        opt,
        obs_normalizer=obs_normalizer,
        gpu=args.gpu,
        update_interval=args.update_interval,
        minibatch_size=args.batch_size,
        epochs=args.epochs,
        clip_eps_vf=None,
        entropy_coef=0,
        standardize_advantages=True,
        gamma=0.995,
        lambd=0.97,
    )

    if args.load:
        agent.load(args.load)

    if args.demo:
        env = make_batch_env(True)
        eval_stats = experiments.eval_performance(
            env=env,
            agent=agent,
            n_steps=None,
            n_episodes=args.eval_n_runs,
            max_episode_len=timestep_limit)
        print('n_runs: {} mean: {} median: {} stdev {}'.format(
            args.eval_n_runs, eval_stats['mean'], eval_stats['median'],
            eval_stats['stdev']))
    else:
        experiments.train_agent_batch_with_evaluation(
            agent=agent,
            env=make_batch_env(False),
            eval_env=make_batch_env(True),
            outdir=args.outdir,
            steps=args.steps,
            eval_n_steps=None,
            eval_n_episodes=args.eval_n_runs,
            eval_interval=args.eval_interval,
            log_interval=args.log_interval,
            max_episode_len=timestep_limit,
            save_best_so_far_agent=False,
        )
Example #32
0
    def forward_one_step(self, x_data, state, continuous=True, nonlinear_q='tanh', nonlinear_p='tanh', output_f = 'sigmoid', gpu=-1):

        output = np.zeros( x_data.shape ).astype(np.float32)

        nonlinear = {'sigmoid': F.sigmoid, 'tanh': F.tanh, 'softplus': self.softplus, 'relu': F.relu}
        nonlinear_f_q = nonlinear[nonlinear_q]
        nonlinear_f_p = nonlinear[nonlinear_p]

        output_a_f = nonlinear[output_f]

        # compute q(z|x)
        for i in range(x_data.shape[0]):
            x_in_t = Variable(x_data[i].reshape((1, x_data.shape[1])))
            hidden_q_t = nonlinear_f_q( self.recog_in_h( x_in_t ) + self.recog_h_h( state['recog_h'] ) )
            state['recog_h'] = hidden_q_t

        q_mean = self.recog_mean( state['recog_h'] )
        q_log_sigma = 0.5 * self.recog_log_sigma( state['recog_h'] )

        eps = np.random.normal(0, 1, q_log_sigma.data.shape ).astype(np.float32)

        if gpu >= 0:
            eps = cuda.to_gpu(eps)

        eps = Variable(eps)
        z   = q_mean + F.exp(q_log_sigma) * eps

        # compute p( x | z)

        h0 = nonlinear_f_p( self.z(z) )
        out= self.output(h0)
        x_0 = output_a_f( out )
        state['gen_h'] = h0
        if gpu >= 0:
            np_x_0 = cuda.to_cpu(x_0.data)
            output[0] = np_x_0
        else:
            output[0] = x_0.data

        if continuous == True:
            rec_loss = F.mean_squared_error(x_0, Variable(x_data[0].reshape((1, x_data.shape[1]))))
        else:
            rec_loss = F.sigmoid_cross_entropy(out, Variable(x_data[0].reshape((1, x_data.shape[1])).astype(np.int32)))

        x_t = x_0

        for i in range(1, x_data.shape[0]):
            h_t_1 = nonlinear_f_p( self.gen_in_h( x_t ) + self.gen_h_h(state['gen_h']) )
            x_t_1      = self.output(h_t_1)
            state['gen_h'] = h_t_1

            if continuous == True:
                output_t   = output_a_f( x_t_1 )
                rec_loss += F.mean_squared_error(output_t, Variable(x_data[i].reshape((1, x_data.shape[1]))))

            else:
                out = x_t_1
                rec_loss += F.sigmoid_cross_entropy(out, Variable(x_data[i].reshape((1,x_data.shape[1])).astype(np.int32)))
                x_t = output_t = output_a_f( x_t_1 )

            if gpu >= 0:
                np_output_t = cuda.to_cpu(output_t.data)
                output[i] = np_output_t
            else:
                output[i]  = output_t.data


        KLD = -0.0005 * F.sum(1 + q_log_sigma - q_mean**2 - F.exp(q_log_sigma))

        return output, rec_loss, KLD, state
Example #33
0
 def softplus(self, x):
     return F.log(F.exp(x) + 1)
Example #34
0
    def calculate_logistic_loss(self, y, t):
        xp = chainer.cuda.get_array_module(t)
        if xp != numpy:
            xp.cuda.Device(t.device).use()
        nr_mix = y.shape[1] // 3

        logit_probs = y[:, :nr_mix]
        means = y[:, nr_mix:2 * nr_mix]
        log_scales = y[:, 2 * nr_mix:3 * nr_mix]
        log_scales = F.maximum(
            log_scales, self.scalar_to_tensor(log_scales, self.log_scale_min))

        t = F.broadcast_to(t, means.shape)

        centered_t = t - means
        inv_std = F.exp(-log_scales)
        plus_in = inv_std * (centered_t + 1 / (self.quantize - 1))
        cdf_plus = F.sigmoid(plus_in)
        min_in = inv_std * (centered_t - 1 / (self.quantize - 1))
        cdf_min = F.sigmoid(min_in)

        log_cdf_plus = plus_in - F.softplus(plus_in)
        log_one_minus_cdf_min = -F.softplus(min_in)

        cdf_delta = cdf_plus - cdf_min

        # mid_in = inv_std * centered_t
        # log_pdf_mid = mid_in - log_scales - 2 * F.softplus(mid_in)

        log_probs = F.where(
            # condition
            t.array < self.scalar_to_tensor(t, -0.999),

            # true
            log_cdf_plus,

            # false
            F.where(
                # condition
                t.array > self.scalar_to_tensor(t, 0.999),

                # true
                log_one_minus_cdf_min,

                # false
                F.log(
                    F.maximum(cdf_delta,
                              self.scalar_to_tensor(cdf_delta, 1e-12)))
                # F.where(
                #     # condition
                #     cdf_delta.array > self.scalar_to_tensor(cdf_delta, 1e-5),

                #     # true
                #     F.log(F.maximum(
                #         cdf_delta, self.scalar_to_tensor(cdf_delta, 1e-12))),

                #     # false
                #     log_pdf_mid - self.xp.log((self.quantize - 1) / 2))
            ))

        log_probs = log_probs + F.log_softmax(logit_probs)
        loss = -F.mean(F.logsumexp(log_probs, axis=1))
        return loss
Example #35
0
 def var_func(x):
     return F.exp(x)**2
Example #36
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=0,
                        help='GPU device ID. Set to -1 to use CPUs only.')
    parser.add_argument('--env', type=str, default='Hopper-v2',
                        help='Gym Env ID')
    parser.add_argument('--seed', type=int, default=0,
                        help='Random seed [0, 2 ** 32)')
    parser.add_argument('--outdir', type=str, default='results',
                        help='Directory path to save output files.'
                             ' If it does not exist, it will be created.')
    parser.add_argument('--steps', type=int, default=2 * 10 ** 6,
                        help='Total time steps for training.')
    parser.add_argument('--eval-interval', type=int, default=100000,
                        help='Interval between evaluation phases in steps.')
    parser.add_argument('--eval-n-runs', type=int, default=100,
                        help='Number of episodes ran in an evaluation phase')
    parser.add_argument('--render', action='store_true', default=False,
                        help='Render the env')
    parser.add_argument('--demo', action='store_true', default=False,
                        help='Run demo episodes, not training')
    parser.add_argument('--load-pretrained', action='store_true',
                        default=False)
    parser.add_argument('--pretrained-type', type=str, default="best",
                        choices=['best', 'final'])
    parser.add_argument('--load', type=str, default='',
                        help='Directory path to load a saved agent data from'
                             ' if it is a non-empty string.')
    parser.add_argument('--trpo-update-interval', type=int, default=5000,
                        help='Interval steps of TRPO iterations.')
    parser.add_argument('--logger-level', type=int, default=logging.INFO,
                        help='Level of the root logger.')
    parser.add_argument('--monitor', action='store_true',
                        help='Monitor the env by gym.wrappers.Monitor.'
                             ' Videos and additional log will be saved.')
    args = parser.parse_args()

    logging.basicConfig(level=args.logger_level)

    # Set random seed
    chainerrl.misc.set_random_seed(args.seed, gpus=(args.gpu,))

    args.outdir = chainerrl.experiments.prepare_output_dir(args, args.outdir)

    def make_env(test):
        env = gym.make(args.env)
        # Use different random seeds for train and test envs
        env_seed = 2 ** 32 - 1 - args.seed if test else args.seed
        env.seed(env_seed)
        # Cast observations to float32 because our model uses float32
        env = chainerrl.wrappers.CastObservationToFloat32(env)
        if args.monitor:
            env = gym.wrappers.Monitor(env, args.outdir)
        if args.render:
            env = chainerrl.wrappers.Render(env)
        return env

    env = make_env(test=False)
    timestep_limit = env.spec.max_episode_steps
    obs_space = env.observation_space
    action_space = env.action_space
    print('Observation space:', obs_space)
    print('Action space:', action_space)

    assert isinstance(obs_space, gym.spaces.Box)

    # Normalize observations based on their empirical mean and variance
    obs_normalizer = chainerrl.links.EmpiricalNormalization(
        obs_space.low.size, clip_threshold=5)

    # Orthogonal weight initialization is used as OpenAI Baselines does
    winit = chainerrl.initializers.Orthogonal(1.)
    winit_last = chainerrl.initializers.Orthogonal(1e-2)

    action_size = action_space.low.size
    policy = chainer.Sequential(
        L.Linear(None, 64, initialW=winit),
        F.tanh,
        L.Linear(None, 64, initialW=winit),
        F.tanh,
        L.Linear(None, action_size, initialW=winit_last),
        chainerrl.policies.GaussianHeadWithStateIndependentCovariance(
            action_size=action_size,
            var_type='diagonal',
            var_func=lambda x: F.exp(2 * x),  # Parameterize log std
            var_param_init=0,  # log std = 0 => std = 1
        ),
    )

    vf = chainer.Sequential(
        L.Linear(None, 64, initialW=winit),
        F.tanh,
        L.Linear(None, 64, initialW=winit),
        F.tanh,
        L.Linear(None, 1, initialW=winit),
    )

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        policy.to_gpu(args.gpu)
        vf.to_gpu(args.gpu)
        obs_normalizer.to_gpu(args.gpu)

    # TRPO's policy is optimized via CG and line search, so it doesn't require
    # a chainer.Optimizer. Only the value function needs it.
    vf_opt = chainer.optimizers.Adam()
    vf_opt.setup(vf)

    # Draw the computational graph and save it in the output directory.
    fake_obs = chainer.Variable(
        policy.xp.zeros_like(obs_space.low, dtype=np.float32)[None],
        name='observation')
    chainerrl.misc.draw_computational_graph(
        [policy(fake_obs)], os.path.join(args.outdir, 'policy'))
    chainerrl.misc.draw_computational_graph(
        [vf(fake_obs)], os.path.join(args.outdir, 'vf'))

    # Hyperparameters in http://arxiv.org/abs/1709.06560
    agent = chainerrl.agents.TRPO(
        policy=policy,
        vf=vf,
        vf_optimizer=vf_opt,
        obs_normalizer=obs_normalizer,
        update_interval=args.trpo_update_interval,
        max_kl=0.01,
        conjugate_gradient_max_iter=20,
        conjugate_gradient_damping=1e-1,
        gamma=0.995,
        lambd=0.97,
        vf_epochs=5,
        entropy_coef=0,
    )

    if args.load or args.load_pretrained:
        # either load or load_pretrained must be false
        assert not args.load or not args.load_pretrained
        if args.load:
            agent.load(args.load)
        else:
            agent.load(chainerrl.misc.download_model(
                "TRPO", args.env,
                model_type=args.pretrained_type)[0])

    if args.demo:
        env = make_env(test=True)
        eval_stats = chainerrl.experiments.eval_performance(
            env=env,
            agent=agent,
            n_steps=None,
            n_episodes=args.eval_n_runs,
            max_episode_len=timestep_limit)
        print('n_runs: {} mean: {} median: {} stdev {}'.format(
            args.eval_n_runs, eval_stats['mean'], eval_stats['median'],
            eval_stats['stdev']))
    else:

        chainerrl.experiments.train_agent_with_evaluation(
            agent=agent,
            env=env,
            eval_env=make_env(test=True),
            outdir=args.outdir,
            steps=args.steps,
            eval_n_steps=None,
            eval_n_episodes=args.eval_n_runs,
            eval_interval=args.eval_interval,
            train_max_episode_len=timestep_limit,
        )
Example #37
0
 def gaussian_kl_divergence_keepbatch(self, mean, ln_var):
     var = F.exp(ln_var)
     kld = F.sum(mean * mean + var - ln_var - 1, axis=1) * 0.5
     return kld
Example #38
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=0,
                        help='GPU device ID. Set to -1 to use CPUs only.')
    parser.add_argument('--env', type=str, default='Hopper-v1',
                        help='Gym Env ID')
    parser.add_argument('--seed', type=int, default=0,
                        help='Random seed [0, 2 ** 32)')
    parser.add_argument('--outdir', type=str, default='results',
                        help='Directory path to save output files.'
                             ' If it does not exist, it will be created.')
    parser.add_argument('--steps', type=int, default=10 ** 6,
                        help='Total time steps for training.')
    parser.add_argument('--eval-interval', type=int, default=10000,
                        help='Interval between evaluation phases in steps.')
    parser.add_argument('--eval-n-runs', type=int, default=10,
                        help='Number of episodes ran in an evaluation phase')
    parser.add_argument('--render', action='store_true', default=False,
                        help='Render the env')
    parser.add_argument('--demo', action='store_true', default=False,
                        help='Run demo episodes, not training')
    parser.add_argument('--load', type=str, default='',
                        help='Directory path to load a saved agent data from'
                             ' if it is a non-empty string.')
    parser.add_argument('--trpo-update-interval', type=int, default=5000,
                        help='Interval steps of TRPO iterations.')
    parser.add_argument('--logger-level', type=int, default=logging.INFO,
                        help='Level of the root logger.')
    parser.add_argument('--monitor', action='store_true',
                        help='Monitor the env by gym.wrappers.Monitor.'
                             ' Videos and additional log will be saved.')
    args = parser.parse_args()

    logging.basicConfig(level=args.logger_level)

    # Set random seed
    chainerrl.misc.set_random_seed(args.seed, gpus=(args.gpu,))

    args.outdir = chainerrl.experiments.prepare_output_dir(args, args.outdir)

    def make_env(test):
        env = gym.make(args.env)
        # Use different random seeds for train and test envs
        env_seed = 2 ** 32 - args.seed if test else args.seed
        env.seed(env_seed)
        # Cast observations to float32 because our model uses float32
        env = chainerrl.wrappers.CastObservationToFloat32(env)
        if args.monitor:
            env = gym.wrappers.Monitor(env, args.outdir)
        if args.render:
            chainerrl.misc.env_modifiers.make_rendered(env)
        return env

    env = make_env(test=False)
    timestep_limit = env.spec.tags.get(
        'wrapper_config.TimeLimit.max_episode_steps')
    obs_space = env.observation_space
    action_space = env.action_space
    print('Observation space:', obs_space)
    print('Action space:', action_space)

    if not isinstance(obs_space, gym.spaces.Box):
        print("""\
This example only supports gym.spaces.Box observation spaces. To apply it to
other observation spaces, use a custom phi function that convert an observation
to numpy.ndarray of numpy.float32.""")  # NOQA
        return

    # Normalize observations based on their empirical mean and variance
    obs_normalizer = chainerrl.links.EmpiricalNormalization(
        obs_space.low.size)

    if isinstance(action_space, gym.spaces.Box):
        # Use a Gaussian policy for continuous action spaces
        policy = \
            chainerrl.policies.FCGaussianPolicyWithStateIndependentCovariance(
                obs_space.low.size,
                action_space.low.size,
                n_hidden_channels=64,
                n_hidden_layers=2,
                mean_wscale=0.01,
                nonlinearity=F.tanh,
                var_type='diagonal',
                var_func=lambda x: F.exp(2 * x),  # Parameterize log std
                var_param_init=0,  # log std = 0 => std = 1
            )
    elif isinstance(action_space, gym.spaces.Discrete):
        # Use a Softmax policy for discrete action spaces
        policy = chainerrl.policies.FCSoftmaxPolicy(
            obs_space.low.size,
            action_space.n,
            n_hidden_channels=64,
            n_hidden_layers=2,
            last_wscale=0.01,
            nonlinearity=F.tanh,
        )
    else:
        print("""\
TRPO only supports gym.spaces.Box or gym.spaces.Discrete action spaces.""")  # NOQA
        return

    # Use a value function to reduce variance
    vf = chainerrl.v_functions.FCVFunction(
        obs_space.low.size,
        n_hidden_channels=64,
        n_hidden_layers=2,
        last_wscale=0.01,
        nonlinearity=F.tanh,
    )

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        policy.to_gpu(args.gpu)
        vf.to_gpu(args.gpu)
        obs_normalizer.to_gpu(args.gpu)

    # TRPO's policy is optimized via CG and line search, so it doesn't require
    # a chainer.Optimizer. Only the value function needs it.
    vf_opt = chainer.optimizers.Adam()
    vf_opt.setup(vf)

    # Draw the computational graph and save it in the output directory.
    fake_obs = chainer.Variable(
        policy.xp.zeros_like(obs_space.low, dtype=np.float32)[None],
        name='observation')
    chainerrl.misc.draw_computational_graph(
        [policy(fake_obs)], os.path.join(args.outdir, 'policy'))
    chainerrl.misc.draw_computational_graph(
        [vf(fake_obs)], os.path.join(args.outdir, 'vf'))

    # Hyperparameters in http://arxiv.org/abs/1709.06560
    agent = chainerrl.agents.TRPO(
        policy=policy,
        vf=vf,
        vf_optimizer=vf_opt,
        obs_normalizer=obs_normalizer,
        update_interval=args.trpo_update_interval,
        conjugate_gradient_max_iter=20,
        conjugate_gradient_damping=1e-1,
        gamma=0.995,
        lambd=0.97,
        vf_epochs=5,
        entropy_coef=0,
    )

    if args.load:
        agent.load(args.load)

    if args.demo:
        env = make_env(test=True)
        eval_stats = chainerrl.experiments.eval_performance(
            env=env,
            agent=agent,
            n_runs=args.eval_n_runs,
            max_episode_len=timestep_limit)
        print('n_runs: {} mean: {} median: {} stdev {}'.format(
            args.eval_n_runs, eval_stats['mean'], eval_stats['median'],
            eval_stats['stdev']))
    else:

        chainerrl.experiments.train_agent_with_evaluation(
            agent=agent,
            env=env,
            eval_env=make_env(test=True),
            outdir=args.outdir,
            steps=args.steps,
            eval_n_runs=args.eval_n_runs,
            eval_interval=args.eval_interval,
            max_episode_len=timestep_limit,
        )
    def __call__(self, image, generate=False):
        n_turn, n_word = self.n_turn, self.n_word
        train = self.train
        # train = True
        # traing no atai izon de okasiku naru...

        accum_loss = 0.
        sub_accum_loss = 0.

        batchsize = image.data.shape[0]
        sentence_history = []
        log_prob_history = []
        canvas_history = []
        p_dists_history = []

        # Initialize canvas of Listener
        canvas = chainer.Variable(self.xp.zeros(image.data.shape, np.float32),
                                  volatile='auto')

        loss_list = []
        raw_loss_list = []

        # [Speaker]
        # Percieve
        hidden_image = self.speaker.perceive(image, n_turn, train=train)
        # bn_list[n_turn] is used for real image
        for turn in range(n_turn):
            # [Speaker]
            # Express the image x compared to canvas
            # Perceive
            hidden_canvas = self.speaker.perceive(canvas, turn, train=train)

            # Express
            thought = self.speaker.think(hidden_image,
                                         hidden_canvas,
                                         turn,
                                         train=train)

            sampled_word_idx_seq, log_probability, p_dists = self.speaker.speak(
                thought, n_word=n_word, train=train)

            # [Listener]
            # Interpret the expression & Paint it into canvas
            # Perceive (only canvas)
            hidden_canvas = self.listener.perceive(canvas, turn, train=train)

            # Interpret the expression with current situation (canvas)
            message_meaning = self.listener.listen(sampled_word_idx_seq,
                                                   turn,
                                                   train=train)

            concept = self.listener.think(hidden_canvas,
                                          message_meaning,
                                          turn,
                                          train=train)

            # ZURU
            if self.zuru:
                # concept = F.dropout(thought, ratio=0.5, train=train)
                concept = thought

            # Paint
            # canvas = self.listener.painter(
            #    canvas, concept, turn, train=train)
            canvas += self.listener.painter(concept, turn, train=train)

            # Physical limitations of canvas (leaky to make gradient active)
            canvas = F.clip(canvas, 0., 1.) * 0.9 + canvas * 0.1

            # Save
            canvas_history.append(canvas)
            sentence_history.append(sampled_word_idx_seq)
            log_prob_history.append(log_probability)
            p_dists_history.append(p_dists)

            # Calculate communication loss
            raw_loss = (canvas - image)**2

            second = reduce(lambda a, b: a * b, raw_loss.shape[1:])
            raw_loss = F.reshape(raw_loss, (raw_loss.shape[0], second))
            raw_loss = F.sum(raw_loss, axis=1)
            raw_loss_list.append(raw_loss)

            loss = F.sum(raw_loss) / image.data.size
            loss_list.append(loss)

            report({'l{}'.format(turn): loss}, self)
            report(
                {'p{}'.format(turn): self.xp.exp(log_probability.data.mean())},
                self)

        # Add the last loss
        accum_loss += loss_list[-1]
        report({'loss': accum_loss}, self)

        # Add (minus) reinforce
        #reward = (1. - raw_loss_list[-1]).data
        reward = (-raw_loss_list[-1]).data
        baseline = self.baseline if not self.baseline is None \
            else self.xp.mean(reward)

        reinforce = F.sum(
            sum(log_prob_history) / n_turn * (reward - baseline)) / reward.size
        self.baseline = self.baseline * 0.99 + self.xp.mean(reward) * 0.01 \
            if not self.baseline is None \
            else self.xp.mean(reward)
        accum_reinforce = reinforce
        report({'reward': accum_reinforce}, self)
        #sub_accum_loss -= accum_reinforce * 0.00001
        #sub_accum_loss -= accum_reinforce * 100.
        #sub_accum_loss -= accum_reinforce * 0.1
        sub_accum_loss -= accum_reinforce * 1.

        # Add loss at full turn
        if self.calc_full_turn:
            decay = 0.5
            accum_loss_full_turn = sum(loss_list[j] * decay**(n_turn - j - 1)
                                       for j in range(n_turn - 1))
            sub_accum_loss += accum_loss_full_turn
            report({'full': accum_loss_full_turn}, self)

        # Add loss of modification
        if self.calc_modification:
            margin = 0.1
            accum_loss_modification = sum(
                F.relu(margin + loss_list[i] - loss_list[i - 1].data)
                for i in range(1, n_turn))
            sub_accum_loss += accum_loss_modification
            report({'mod': accum_loss_modification}, self)

        # Add loss to orthogonal matrix
        if self.calc_orthogonal_loss:

            def orthogonal_regularizer(M):
                nM = F.normalize(M)
                MM = F.matmul(nM, F.transpose(nM))
                iden = self.xp.identity(MM.shape[0])
                return F.sum((MM - MM * iden)**2)

            orthogonal_loss = orthogonal_regularizer(
                self.speaker.language.definition.W) + \
                orthogonal_regularizer(
                    self.listener.language.definition.W)
            sub_accum_loss += orthogonal_loss * self.calc_orthogonal_loss
            report({'ortho': orthogonal_loss}, self)

        # Add balancing vocabulary
        concat_p = F.concat(sum(p_dists_history, []), axis=0)
        p_mean = F.sum(concat_p, axis=0) / concat_p.shape[0]
        report({'p_mean': p_mean}, self)  # is this meaningless?

        perplexity = F.exp(
            F.sum(sum(log_prob_history)) / len(log_prob_history) /
            batchsize).data
        report({'perplexity': perplexity}, self)

        if self.calc_importance_loss:

            def importance_regularizer(p):
                importance = F.sum(p, axis=0)
                mean_i = F.sum(importance) / importance.size
                mean_i_bc = F.broadcast_to(mean_i[None, ], importance.shape)
                std_i = (F.sum(
                    (importance - mean_i_bc)**2) / importance.size)**0.5
                cv = std_i / mean_i
                return cv**2

            importance_loss = importance_regularizer(concat_p)
            sub_accum_loss += importance_loss * self.calc_importance_loss

            report({'importance': importance_loss}, self)

        report({'total': accum_loss}, self)

        # Merge main and sub loss
        accum_loss += sub_accum_loss
        self.sub_accum_loss = sub_accum_loss.data

        if generate:
            return [[i.data for i in s] for s in sentence_history], \
                [lp.data for lp in log_prob_history], \
                [F.clip(cv, 0., 1.).data for cv in canvas_history]
        else:
            return accum_loss
Example #40
0
 def prob(self, x):
     return F.exp(self.log_prob(x))
Example #41
0
    def rl_agent(self, env):

        # self.policy = chainer.Sequential(
        # 	L.BatchNormalization(axis=0),
        # 	L.Linear(None, 256),
        # 	# F.dropout(ratio=.5),
        # 	F.tanh,
        # 	L.Linear(None, 128),
        # 	# F.dropout(ratio=.5),
        # 	F.tanh,
        # 	# L.Linear(None, env.action_space.low.size, initialW=winit_last),
        # 	L.Linear(None, env.action_space.low.size),
        # 	# F.sigmoid,
        # 	chainerrl.policies.GaussianHeadWithStateIndependentCovariance(
        # 		action_size=env.action_space.low.size,
        # 		var_type='diagonal',
        # 		var_func=lambda x: F.exp(2 * x),  # Parameterize log std
        # 		# var_param_init=0,  # log std = 0 => std = 1
        # 		))

        self.policy = chainer.Sequential(
            L.BatchNormalization(axis=0),
            L.Linear(None, 256),
            # F.dropout(ratio=.5),
            F.sigmoid,
            # F.relu,
            L.Linear(None, 128),
            # F.dropout(ratio=.5),
            F.sigmoid,
            # L.Linear(None, env.action_space.low.size, initialW=winit_last),
            L.Linear(None, env.action_space.low.size),
            F.sigmoid,
            chainerrl.policies.GaussianHeadWithStateIndependentCovariance(
                action_size=env.action_space.low.size,
                var_type='diagonal',
                var_func=lambda x: F.exp(2 * x),  # Parameterize log std
                # var_param_init=0,  # log std = 0 => std = 1
            ))

        self.vf = chainer.Sequential(
            L.BatchNormalization(axis=0),
            L.Linear(None, 256),
            # F.dropout(ratio=.5),
            F.sigmoid,
            L.Linear(None, 128),
            # F.dropout(ratio=.5),
            F.sigmoid,
            L.Linear(None, 1),
            F.sigmoid,
        )

        # self.vf = chainer.Sequential(
        # 	L.BatchNormalization(axis=0),
        # 	L.Linear(None, 256),
        # 	# F.dropout(ratio=.5),
        # 	F.tanh,
        # 	L.Linear(None, 128),
        # 	# F.dropout(ratio=.5),
        # 	F.tanh,
        # 	L.Linear(None, 1),
        # )

        # Combine a policy and a value function into a single model
        self.model = chainerrl.links.Branched(self.policy, self.vf)

        self.opt = chainer.optimizers.Adam(alpha=3e-3, eps=1e-5)
        self.opt.setup(self.model)

        self.agent = PPO(
            self.model,
            self.opt,
            # obs_normalizer=obs_normalizer,
            gpu=-1,
            update_interval=64,
            minibatch_size=32,
            clip_eps_vf=None,
            entropy_coef=0.001,
            # standardize_advantages=args.standardize_advantages,
        )

        return self.agent
Example #42
0
File: sac.py Project: toy101/DSAC
 def __call__(self):
     """Return a temperature as a chainer.Variable."""
     return F.exp(self.log_temperature)
Example #43
0
 def exp(self):
     return F.exp(self._lagrange_multiplier)
Example #44
0
def sigmoid_cross_entropy(x, z):
    return F.sum(F.relu(x) - x * z + F.log(1 + F.exp(-abs(x))))
Example #45
0
    def __call__(self, input_x, t):
        output = self.predictor(input_x)
        batch_size, _, grid_h, grid_w = output.shape
        self.seen += batch_size
        x, y, w, h, conf = F.split_axis(F.reshape(
            output, (batch_size, self.predictor.n_boxes, 5, grid_h, grid_w)),
                                        (1, 2, 3, 4),
                                        axis=2)
        x = F.sigmoid(x)  # xのactivation
        y = F.sigmoid(y)  # yのactivation
        conf = F.sigmoid(conf)  # confのactivation

        # wとhが0になるように学習(e^wとe^hは1に近づく -> 担当するbboxの倍率1)
        tw = np.zeros(w.shape, dtype=np.float32)
        th = np.zeros(h.shape, dtype=np.float32)

        # 活性化後のxとyが0.5になるように学習()
        tx = np.tile(0.5, x.shape).astype(np.float32)
        ty = np.tile(0.5, y.shape).astype(np.float32)

        # centerの存在しないbbox誤差学習スケールは基本0.1
        if self.seen < self.unstable_seen:
            box_learning_scale = np.tile(0.1, x.shape).astype(np.float32)
        else:
            box_learning_scale = np.tile(0, x.shape).astype(np.float32)

        # confidenceのtruthは基本0、iouがthresh以上のものは学習しない、ただしobjectの存在するgridのbest_boxのみ真のIOUに近づかせる
        tconf = np.zeros(conf.shape, dtype=np.float32)
        conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32)

        # 全bboxとtruthのiouを計算(batch単位で計算する)
        x_shift = Variable(
            np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape[1:]))
        y_shift = Variable(
            np.broadcast_to(
                np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1),
                y.shape[1:]))
        w_anchor = Variable(
            np.broadcast_to(
                np.reshape(
                    np.array(self.anchors, dtype=np.float32)[:, 0],
                    (self.predictor.n_boxes, 1, 1, 1)), w.shape[1:]))
        h_anchor = Variable(
            np.broadcast_to(
                np.reshape(
                    np.array(self.anchors, dtype=np.float32)[:, 1],
                    (self.predictor.n_boxes, 1, 1, 1)), h.shape[1:]))
        x_shift.to_gpu()
        y_shift.to_gpu()
        w_anchor.to_gpu()
        h_anchor.to_gpu()
        best_ious = []
        for batch in range(batch_size):
            n_truth_boxes = len(t[batch])
            box_x = (x[batch] + x_shift) / grid_w
            box_y = (y[batch] + y_shift) / grid_h
            box_w = F.exp(w[batch]) * w_anchor / grid_w
            box_h = F.exp(h[batch]) * h_anchor / grid_h

            ious = []
            for truth_index in range(n_truth_boxes):
                truth_box_x = Variable(
                    np.broadcast_to(
                        np.array(t[batch][truth_index]["x"], dtype=np.float32),
                        box_x.shape))
                truth_box_y = Variable(
                    np.broadcast_to(
                        np.array(t[batch][truth_index]["y"], dtype=np.float32),
                        box_y.shape))
                truth_box_w = Variable(
                    np.broadcast_to(
                        np.array(t[batch][truth_index]["w"], dtype=np.float32),
                        box_w.shape))
                truth_box_h = Variable(
                    np.broadcast_to(
                        np.array(t[batch][truth_index]["h"], dtype=np.float32),
                        box_h.shape))
                truth_box_x.to_gpu()
                truth_box_y.to_gpu()
                truth_box_w.to_gpu()
                truth_box_h.to_gpu()
                ious.append(
                    multi_box_iou(
                        Box(box_x, box_y, box_w, box_h),
                        Box(truth_box_x, truth_box_y, truth_box_w,
                            truth_box_h)).data.get())
            ious = np.array(ious)
            best_ious.append(np.max(ious, axis=0))
        best_ious = np.array(best_ious)

        # 一定以上のiouを持つanchorに対しては、confを0に下げないようにする(truthの周りのgridはconfをそのまま維持)。
        tconf[best_ious > self.thresh] = conf.data.get()[
            best_ious > self.thresh]
        conf_learning_scale[best_ious > self.thresh] = 0

        # objectの存在するanchor boxのみ個別修正
        abs_anchors = self.anchors / np.array([grid_w, grid_h])
        for batch in range(batch_size):
            truth_box = t[batch][0]
            truth_w = int(float(truth_box["x"]) * grid_w)
            truth_h = int(float(truth_box["y"]) * grid_h)
            truth_n = 0
            best_iou = 0.0
            for anchor_index, abs_anchor in enumerate(abs_anchors):
                iou = box_iou(
                    Box(0, 0, float(truth_box["w"]), float(truth_box["h"])),
                    Box(0, 0, abs_anchor[0], abs_anchor[1]))
                if best_iou < iou:
                    best_iou = iou
                    truth_n = anchor_index

            # objectの存在するanchorについて、centerを0.5ではなく、真の座標に近づかせる。anchorのスケールを1ではなく真のスケールに近づかせる。学習スケールを1にする。
            box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0
            tx[batch, truth_n, :, truth_h,
               truth_w] = float(truth_box["x"]) * grid_w - truth_w
            ty[batch, truth_n, :, truth_h,
               truth_w] = float(truth_box["y"]) * grid_h - truth_h
            tw[batch, truth_n, :, truth_h, truth_w] = np.log(
                float(truth_box["w"]) / abs_anchors[truth_n][0])
            th[batch, truth_n, :, truth_h, truth_w] = np.log(
                float(truth_box["h"]) / abs_anchors[truth_n][1])

            # IOUの観測
            full_truth_box = Box(float(truth_box["x"]), float(truth_box["y"]),
                                 float(truth_box["w"]), float(truth_box["h"]))
            predicted_box = Box(
                (x[batch][truth_n][0][truth_h][truth_w].data.get() + truth_w) /
                grid_w,
                (y[batch][truth_n][0][truth_h][truth_w].data.get() + truth_h) /
                grid_h,
                np.exp(w[batch][truth_n][0][truth_h][truth_w].data.get()) *
                abs_anchors[truth_n][0],
                np.exp(h[batch][truth_n][0][truth_h][truth_w].data.get()) *
                abs_anchors[truth_n][1])
            predicted_iou = box_iou(full_truth_box, predicted_box)
            tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou
            conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 10.0

            # debug prints
            print("best confidences of each grid:")
            for i in range(grid_h):
                for j in range(grid_w):
                    print("%2d" %
                          (int(conf[batch, :, :, i, j].data.max() * 100)),
                          end=" ")
                print()

            print(x[batch, truth_n, :, truth_h,
                    truth_w].data, y[batch, truth_n, :, truth_h, truth_w].data,
                  w[batch, truth_n, :, truth_h,
                    truth_w].data, h[batch, truth_n, :, truth_h, truth_w].data)
            print(tx[batch, truth_n, :, truth_h,
                     truth_w], ty[batch, truth_n, :, truth_h, truth_w],
                  tw[batch, truth_n, :, truth_h,
                     truth_w], th[batch, truth_n, :, truth_h, truth_w])
            print(
                "best default iou: %.2f   predicted iou: %.2f   confidence: %.2f   class: %s"
                % (best_iou, predicted_iou,
                   conf[batch][truth_n][0][truth_h][truth_w].data,
                   t[batch][0]["label"]))
        print("seen = %d" % self.seen)

        tx, ty, tw, th, tconf = Variable(tx), Variable(ty), Variable(
            tw), Variable(th), Variable(tconf)
        box_learning_scale, conf_learning_scale = Variable(
            box_learning_scale), Variable(conf_learning_scale)
        tx.to_gpu()
        ty.to_gpu()
        tw.to_gpu()
        th.to_gpu()
        tconf.to_gpu()
        box_learning_scale.to_gpu()
        conf_learning_scale.to_gpu()

        x_loss = (tx - x)**2
        y_loss = (ty - y)**2
        w_loss = (tw - w)**2
        h_loss = (th - h)**2
        c_loss = (tconf - conf)**2
        loss = F.sum((x_loss + y_loss + w_loss + h_loss) * box_learning_scale +
                     c_loss * conf_learning_scale) / 2
        print("x_loss: %f  y_loss: %f  w_loss: %f  h_loss: %f  c_loss: %f" %
              (F.sum(x_loss).data, F.sum(y_loss).data, F.sum(w_loss).data,
               F.sum(h_loss).data, F.sum(c_loss).data))
        return loss
Example #46
0
def log_prob_from_logit(x):
    c = x.shape[1]
    m = F.max(x, 1, keepdims=True)
    b = m + F.log(F.sum(F.exp(x - F.repeat(m, c, 1)), 1, keepdims=True))
    return x - F.repeat(b, c, 1)
Example #47
0
    def update(self, done):
        if done:
            # bootstrap なし
            R_rev = [0]
            A_rev = [0]
            self.V_preds.append(0)
        else:
            # bootstrap あり
            R_rev = [self.V_preds[-1]]
            A_rev = [0]
            self.R_preds = self.R_preds[:-1]  # bootstrap分削除
            self.rewards = self.rewards[:-1]

        # 累積報酬計算
        r_rev = self.rewards[::-1]
        #print(r_rev)
        for r in r_rev:
            R_rev.append(r + GAMMA * R_rev[-1])
        R = np.array(R_rev[1:][::-1], dtype=Variable)

        # Advantage計算
        N = len(r_rev)
        assert len(self.V_preds) == N + 1
        for i in range(N):
            delta = r_rev[i] + GAMMA * self.V_preds[N - i] - self.V_preds[N - i
                                                                          - 1]
            A_rev.append(delta + GAMMA * LAMBDA * A_rev[-1])
        A = np.array(A_rev[1:][::-1])
        #print(R)
        #print(A)
        #exit()

        # MBP loss
        V_preds = np.array(self.V_preds[:-1])
        R_preds = np.array(self.R_preds)
        #print(V_preds.shape)
        #print(R_preds.shape)
        #print(R.shape)
        #print(A.shape)
        #print(V_preds)
        #print(R_preds)
        #print(R)
        assert len(R) == len(R_preds) == len(V_preds) == len(A)
        R_loss = (np.sum((V_preds - R)**2) + np.sum((R_preds - R)**2)) / 2
        self.loss += ALPHA_RETURN * R_loss

        # Policy 勾配
        A_ = 0
        H = 0
        self.actions = self.actions[1:]  # 初期値分削除
        for i in range(N):
            log_pi = self.log_pies[i]
            #print(log_pi)
            #print(self.actions[i])
            #print(A[i])
            A_ += A[i][0] * log_pi[self.actions[i] == 1][0]
            #print(F.exp(log_pi))
            #print(log_pi)
            #print(np.dot(F.exp(log_pi), log_pi))
            H += -ALPHA_ENTROPY * np.dot(F.exp(log_pi), log_pi)
        self.loss -= A_ + H  # gradient ascend

        # update
        print("  loss: {}".format(self.loss))
        self.cleargrads()
        self.loss.backward()
        self.optimizer.update()
        self.loss.unchain_backward()  # 新しくlossを作り直しているのでいらん?
Example #48
0
    def mkFilter(self, mean_x, mean_y, ln_var, ln_stride, ln_gamma):
        eps = 1e-8
        """
        make Attention Filters, need B Filters for a minibatch(composed of B data), shared between each color map
        [input]
        C: 1[mono],3[color]
        mean_x: Bx1[mono] Bx1[color] (chainer.Variable) 
        mean_y: Bx1[mono] Bx1[color] (chainer.Variable)
        ln_var: Bx1[mono] Bx1[color] (chainer.Variable)
        ln_stride: Bx1[mono] Bx1[color] (chainer.Variable)
        ln_gamma: Bx1[mono] Bx1[color] (Variable)
        [output]
        Fx : BxPxW[mono] 3BxPxW[color] matrix (Variable)
        Fy : BxPxH[mono] 3BxPxH[color] matrix (Variable)
        Gamma BxHxW[mono] 3BxHxW[color] (Variable)
        """
        P = self.patchsize
        B = mean_x.data.shape[0]
        H = self.height
        W = self.width

        mean_x = 0.5 * (W + 1.0) * (mean_x + 1.0)  # (B,1)
        mean_y = 0.5 * (H + 1.0) * (mean_y + 1.0)  # (B,1)
        var = F.exp(ln_var)
        stride = (self.L_edge - 1.0) / (P - 1.0) * F.exp(ln_stride)
        gamma = F.exp(ln_gamma)

        mu_x = F.broadcast_to(mean_x, (P, B, 1))  # (B,1) -> (P,B,1)
        mu_x = F.transpose(mu_x, (1, 0, 2))  #       -> (B,P,1)
        mu_y = F.broadcast_to(mean_y, (P, B, 1))  # (B,1) -> (P,B,1)
        mu_y = F.transpose(mu_y, (1, 0, 2))  #       -> (B,P,1)
        stride = F.broadcast_to(stride, (P, B, 1))  # (B,1) -> (P,B,1)
        stride = F.transpose(stride, (1, 0, 2))  #       -> (B,P,1)
        var_x = F.broadcast_to(var, (P, W, B, 1))  # (B,1) -> (P,W,B,1)
        var_x = F.transpose(var_x, (2, 0, 1, 3))  #       -> (B,P,W,1)
        var_y = F.broadcast_to(var, (P, H, B, 1))  # (B,1) -> (P,H,B,1)
        var_y = F.transpose(var_y, (2, 0, 1, 3))  #       -> (B,P,H,1)

        mu_x = mu_x + F.broadcast_to(self.Parray,
                                     (B, P, 1)) * stride  # (B,P,1)
        mu_y = mu_y + F.broadcast_to(self.Parray,
                                     (B, P, 1)) * stride  # (B,P,1)

        mu_x = F.transpose(F.broadcast_to(mu_x, (self.width, B, P, 1)),
                           (1, 2, 0, 3))
        mu_x = F.broadcast_to(self.Warray,
                              (B, P, W)) - F.reshape(mu_x, (B, P, W))
        mu_y = F.transpose(F.broadcast_to(mu_y, (self.height, B, P, 1)),
                           (1, 2, 0, 3))
        mu_y = F.broadcast_to(self.Harray,
                              (B, P, H)) - F.reshape(mu_y, (B, P, H))
        var_x = F.reshape(var_x, (B, P, W))  # (B,P,W) -> (B,P,W)
        var_y = F.reshape(var_y, (B, P, H))  # (B,P,H) -> (B,P,H)

        x_square = -0.5 * (mu_x / var_x)**2  # (B,P,W)
        y_square = -0.5 * (mu_y / var_y)**2  # (B,P,H)
        x_gauss = F.exp(x_square)
        y_gauss = F.exp(y_square)

        xsum = F.sum(x_gauss, 2)  # (B,P)
        ysum = F.sum(y_gauss, 2)  # (B,P)
        Zx_prev = F.transpose(F.broadcast_to(xsum, (W, B, P)), (1, 2, 0))
        enable = Variable(Zx_prev.data > eps)
        Zx = F.where(enable, Zx_prev,
                     XP.fnonzeros(Zx_prev.data.shape, val=1.0) * eps)
        Zy_prev = F.transpose(F.broadcast_to(ysum, (H, B, P)), (1, 2, 0))
        enable = Variable(Zy_prev.data > eps)
        Zy = F.where(enable, Zy_prev,
                     XP.fnonzeros(Zy_prev.data.shape, val=1.0) * eps)
        Fx = x_gauss / Zx
        Fy = y_gauss / Zy

        gamma_ = F.broadcast_to(gamma,
                                (P, P, self.C, B, 1))  # (B,1) -> (H,W,C,B,1)
        Gamma = F.reshape(F.transpose(gamma_, (4, 3, 2, 0, 1)),
                          (self.C * B, P, P))  #       -> (C*B,H,W)

        Fx_ = F.broadcast_to(Fx, (self.C, B, P, W))
        Fy_ = F.broadcast_to(Fy, (self.C, B, P, H))
        Fx = F.reshape(F.transpose(Fx_, (1, 0, 2, 3)), (self.C * B, P, W))
        Fy = F.reshape(F.transpose(Fy_, (1, 0, 2, 3)), (self.C * B, P, H))

        self.Fx = Fx
        self.Fy = Fy
        self.Gamma = Gamma
Example #49
0
def normalize_2d(x):
    exp = F.exp(x[0])
    sums = F.sum(F.sum(exp, axis=-1), axis=-1)
    expanded = F.expand_dims(F.expand_dims(sums, axis=-1), axis=-1)
    denominator = F.tile(expanded, (1, 160, 210))
    return exp / denominator
def multibox_focal_loss(mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k):
    """Computes multibox losses.
    This is a loss function used in [#]_.
    This function returns :obj:`loc_loss` and :obj:`conf_loss`.
    :obj:`loc_loss` is a loss for localization and
    :obj:`conf_loss` is a loss for classification.
    The formulas of these losses can be found in
    the equation (2) and (3) in the original paper.
    .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan,
       Christian Szegedy, Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
       SSD: Single Shot MultiBox Detector. ECCV 2016.
    Args:
        mb_locs (chainer.Variable or array): The offsets and scales
            for predicted bounding boxes.
            Its shape is :math:`(B, K, 4)`,
            where :math:`B` is the number of samples in the batch and
            :math:`K` is the number of default bounding boxes.
        mb_confs (chainer.Variable or array): The classes of predicted
            bounding boxes.
            Its shape is :math:`(B, K, n\_class)`.
            This function assumes the first class is background (negative).
        gt_mb_locs (chainer.Variable or array): The offsets and scales
            for ground truth bounding boxes.
            Its shape is :math:`(B, K, 4)`.
        gt_mb_labels (chainer.Variable or array): The classes of ground truth
            bounding boxes.
            Its shape is :math:`(B, K)`.
        k (float): A coefficient which is used for hard negative mining.
            This value determines the ratio between the number of positives
            and that of mined negatives. The value used in the original paper
            is :obj:`3`.
    Returns:
        tuple of chainer.Variable:
        This function returns two :obj:`chainer.Variable`: :obj:`loc_loss` and
        :obj:`conf_loss`.
    """
    mb_locs = chainer.as_variable(mb_locs)
    mb_confs = chainer.as_variable(mb_confs)
    gt_mb_locs = chainer.as_variable(gt_mb_locs)
    #gt_mb_labels = chainer.as_variable(gt_mb_labels)

    xp = chainer.cuda.get_array_module(gt_mb_locs.array)

    #print(gt_mb_labels.array.device)
    #print('Multibox')
    #print(chainer.cuda.get_device_from_array(gt_mb_labels.array))

    #with gt_mb_labels.array.device:
    #positive = gt_mb_labels.array > 0
    positive = gt_mb_labels > 0
    n_positive = positive.sum()

    if n_positive == 0:
        z = chainer.Variable(xp.zeros((), dtype=np.float32))
        return z, z

    loc_loss = F.huber_loss(mb_locs, gt_mb_locs, 1, reduce='no')
    loc_loss = F.sum(loc_loss, axis=-1)
    loc_loss *= positive.astype(loc_loss.dtype)
    loc_loss = F.sum(loc_loss) / n_positive

    #conf_loss = _elementwise_softmax_cross_entropy(mb_confs, gt_mb_labels)
    #hard_negative = _hard_negative(conf_loss.array, positive, k)
    #conf_loss *= xp.logical_or(positive, hard_negative).astype(conf_loss.dtype)

    alpha = 0.75
    gamma = 2

    t = gt_mb_labels.reshape(gt_mb_labels.shape[0] * gt_mb_labels.shape[1], )
    class_num = mb_confs.shape[2]  # class_num includes back ground class
    t = F.cast(chainer.as_variable(xp.eye(class_num)[t]), loc_loss.dtype)
    t = t.reshape(gt_mb_labels.shape[0], gt_mb_labels.shape[1], class_num)

    p = F.sigmoid(mb_confs)
    #pt = p * t + (1 - p) * (1 - t) # pt = p if t > 0 else 1-p
    #w = alpha * t + (1 - alpha) * (1 - t)  # w = alpha if t > 0 else 1 - alpha
    #w = w * ((1 - pt) ** gamma)

    pt = F.where(t.array > 0, p, 1 - p)
    w = (1 - pt)**gamma
    w = F.where(t.array > 0, alpha * w, (1 - alpha) * w)

    # From Pytorch implemetation binary_cross_entropy_with_logits
    # https://pytorch.org/docs/master/_modules/torch/nn/functional.html#binary_cross_entropy_with_logits
    max_val = F.clip(-mb_confs, x_min=0.0, x_max=10.0e+12)
    focal_loss = mb_confs - mb_confs * t + max_val + F.log(
        F.exp(-max_val) + F.exp(-mb_confs - max_val))
    focal_loss = F.sum(focal_loss * w) / n_positive
    #focal_loss = -F.sum(w * F.log(pt + 1e-12)) / n_positive

    return loc_loss, focal_loss
    def __call__(self, input_x, t, train=True):
        output_fcn, output_yolo = self.predictor(input_x, train=train)
        if self.FCN:
            if train:
                loss_fcn = F.softmax_cross_entropy(output_fcn, t)
                reporter.report({'loss': loss_fcn}, self)
                return loss_fcn
            else:
                loss = F.softmax(output_fcn)
                return loss
        batch_size, _, grid_h, grid_w = output_yolo.shape
        self.seen += batch_size
        x, y, w, h, conf, prob = F.split_axis(F.reshape(
            output_yolo, (batch_size, self.predictor.n_boxes,
                          self.predictor.n_classes_yolo + 5, grid_h, grid_w)),
                                              (1, 2, 3, 4, 5),
                                              axis=2)
        x = F.sigmoid(x)
        y = F.sigmoid(y)
        conf = F.sigmoid(conf)
        prob = F.transpose(prob, (0, 2, 1, 3, 4))
        prob = F.softmax(prob)

        tw = np.zeros(
            w.shape,
            dtype=np.float32)  # wとhが0になるように学習(e^wとe^hは1に近づく -> 担当するbboxの倍率1)
        th = np.zeros(h.shape, dtype=np.float32)
        tx = np.tile(0.5, x.shape).astype(np.float32)  # 活性化後のxとyが0.5になるように学習()
        ty = np.tile(0.5, y.shape).astype(np.float32)

        if self.seen < self.unstable_seen:
            box_learning_scale = np.tile(0.1, x.shape).astype(np.float32)
        else:
            box_learning_scale = np.tile(0, x.shape).astype(np.float32)

        tconf = np.zeros(
            conf.shape, dtype=np.float32
        )  # confidenceのtruthは基本0、iouがthresh以上のものは学習しない、ただしobjectの存在するgridのbest_boxのみ真のIOUに近づかせる
        conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32)

        tprob = prob.data.copy()  # best_anchor以外は学習させない(自身との二乗和誤差 = 0)

        # 全bboxとtruthのiouを計算(batch単位で計算する)
        x_shift = Variable(
            np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape[1:]))
        y_shift = Variable(
            np.broadcast_to(
                np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1),
                y.shape[1:]))
        w_anchor = Variable(
            np.broadcast_to(
                np.reshape(
                    np.array(self.anchors, dtype=np.float32)[:, 0],
                    (self.predictor.n_boxes, 1, 1, 1)), w.shape[1:]))
        h_anchor = Variable(
            np.broadcast_to(
                np.reshape(
                    np.array(self.anchors, dtype=np.float32)[:, 1],
                    (self.predictor.n_boxes, 1, 1, 1)), h.shape[1:]))
        x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu(
        )
        best_ious = []
        for batch in range(batch_size):
            #n_truth_boxes = len(t[batch])
            n_truth_boxes = int(sum(x[0] != 10.0 for x in t[batch]))  # ??
            box_x = (x[batch] + x_shift) / grid_w
            box_y = (y[batch] + y_shift) / grid_h
            box_w = F.exp(w[batch]) * w_anchor / grid_w
            box_h = F.exp(h[batch]) * h_anchor / grid_h

            ious = []
            for truth_index in range(n_truth_boxes):
                t = chainer.cuda.to_cpu(t)  # ??
                truth_box_x = Variable(
                    np.broadcast_to(
                        np.array(t[batch][truth_index][1], dtype=np.float32),
                        box_x.shape))
                truth_box_y = Variable(
                    np.broadcast_to(
                        np.array(t[batch][truth_index][2], dtype=np.float32),
                        box_y.shape))
                truth_box_w = Variable(
                    np.broadcast_to(
                        np.array(t[batch][truth_index][3], dtype=np.float32),
                        box_w.shape))
                truth_box_h = Variable(
                    np.broadcast_to(
                        np.array(t[batch][truth_index][4], dtype=np.float32),
                        box_h.shape))
                truth_box_x.to_gpu(), truth_box_y.to_gpu(), truth_box_w.to_gpu(
                ), truth_box_h.to_gpu()
                ious.append(
                    multi_box_iou(
                        Box(box_x, box_y, box_w, box_h),
                        Box(truth_box_x, truth_box_y, truth_box_w,
                            truth_box_h)).data.get())
            if ious:
                ious = np.array(ious)
                best_ious.append(np.max(ious, axis=0))
            else:
                best_ious.append(0)
        best_ious = np.array(best_ious)

        # 一定以上のiouを持つanchorに対しては、confを0に下げないようにする(truthの周りのgridはconfをそのまま維持)。
        tconf[best_ious > self.thresh] = conf.data.get()[
            best_ious > self.thresh]
        conf_learning_scale[best_ious > self.thresh] = 0

        # objectの存在するanchor boxのみ、x、y、w、h、conf、probを個別修正
        abs_anchors = self.anchors / np.array([grid_w, grid_h])
        for batch in range(batch_size):
            for truth_box in t[batch]:
                if truth_box[0] == 10.0:  # ??
                    continue
                truth_w = int(float(truth_box[1]) * grid_w)
                truth_h = int(float(truth_box[2]) * grid_h)
                truth_n = 0
                best_iou = 0.0
                for anchor_index, abs_anchor in enumerate(abs_anchors):
                    iou = box_iou(
                        Box(0, 0, float(truth_box[3]), float(truth_box[4])),
                        Box(0, 0, abs_anchor[0], abs_anchor[1]))
                    if best_iou < iou:
                        best_iou = iou
                        truth_n = anchor_index

                # objectの存在するanchorについて、centerを0.5ではなく、真の座標に近づかせる。anchorのスケールを1ではなく真のスケールに近づかせる。学習スケールを1にする。
                box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0
                tx[batch, truth_n, :, truth_h,
                   truth_w] = float(truth_box[1]) * grid_w - truth_w
                ty[batch, truth_n, :, truth_h,
                   truth_w] = float(truth_box[2]) * grid_h - truth_h
                tw[batch, truth_n, :, truth_h, truth_w] = np.log(
                    float(truth_box[3]) / abs_anchors[truth_n][0])
                th[batch, truth_n, :, truth_h, truth_w] = np.log(
                    float(truth_box[4]) / abs_anchors[truth_n][1])
                tprob[batch, :, truth_n, truth_h, truth_w] = 0
                tprob[batch, int(truth_box[0]), truth_n, truth_h, truth_w] = 1

                # IOUの観測
                full_truth_box = Box(float(truth_box[1]), float(truth_box[2]),
                                     float(truth_box[3]), float(truth_box[4]))
                predicted_box = Box(
                    (x[batch][truth_n][0][truth_h][truth_w].data.get() +
                     truth_w) / grid_w,
                    (y[batch][truth_n][0][truth_h][truth_w].data.get() +
                     truth_h) / grid_h,
                    np.exp(w[batch][truth_n][0][truth_h][truth_w].data.get()) *
                    abs_anchors[truth_n][0],
                    np.exp(h[batch][truth_n][0][truth_h][truth_w].data.get()) *
                    abs_anchors[truth_n][1])
                predicted_iou = box_iou(full_truth_box, predicted_box)
                tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou
                conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 10.0

            # debug prints
            maps = F.transpose(prob[batch], (2, 3, 1, 0)).data
        #print("seen = %d" % self.seen)

        # loss計算
        tx, ty, tw, th, tconf, tprob = Variable(tx), Variable(ty), Variable(
            tw), Variable(th), Variable(tconf), Variable(tprob)
        box_learning_scale, conf_learning_scale = Variable(
            box_learning_scale), Variable(conf_learning_scale)
        tx.to_gpu(), ty.to_gpu(), tw.to_gpu(), th.to_gpu(), tconf.to_gpu(
        ), tprob.to_gpu()
        box_learning_scale.to_gpu()
        conf_learning_scale.to_gpu()

        x_loss = F.sum((tx - x)**2 * box_learning_scale) / 2
        y_loss = F.sum((ty - y)**2 * box_learning_scale) / 2
        w_loss = F.sum((tw - w)**2 * box_learning_scale) / 2
        h_loss = F.sum((th - h)**2 * box_learning_scale) / 2
        c_loss = F.sum((tconf - conf)**2 * conf_learning_scale) / 2
        p_loss = F.sum((tprob - prob)**2) / 2
        #print("x_loss: %f  y_loss: %f  w_loss: %f  h_loss: %f  c_loss: %f   p_loss: %f" %
        #    (F.sum(x_loss).data, F.sum(y_loss).data, F.sum(w_loss).data, F.sum(h_loss).data, F.sum(c_loss).data, F.sum(p_loss).data)
        #)
        reporter.report({'x_loss': F.sum(x_loss).data}, self)
        reporter.report({'y_loss': F.sum(y_loss).data}, self)
        reporter.report({'w_loss': F.sum(w_loss).data}, self)
        reporter.report({'h_loss': F.sum(h_loss).data}, self)
        reporter.report({'c_loss': F.sum(c_loss).data}, self)
        reporter.report({'p_loss': F.sum(p_loss).data}, self)

        loss_yolo = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss
        reporter.report({'loss': loss_yolo}, self)
        return loss_yolo
    def forward_one_step(self,
                         x_data,
                         y_data,
                         n_layers_recog,
                         n_layers_gen,
                         nonlinear_q='softplus',
                         nonlinear_p='softplus',
                         output_f='sigmoid',
                         type_qx='gaussian',
                         type_px='gaussian',
                         gpu=-1):
        x = Variable(x_data)
        y = Variable(y_data)

        # set non-linear function
        nonlinear = {
            'sigmoid': F.sigmoid,
            'tanh': F.tanh,
            'softplus': self.softplus,
            'relu': F.relu
        }
        nonlinear_f_q = nonlinear[nonlinear_q]
        nonlinear_f_p = nonlinear[nonlinear_p]

        output_activation = {
            'sigmoid': F.sigmoid,
            'identity': self.identity,
            'tanh': F.tanh
        }
        output_a_f = output_activation[output_f]

        hidden_q = [nonlinear_f_q(self.recog_x(x) + self.recog_y(y))]

        # compute q(z|x, y)

        for i in range(n_layers_recog - 1):
            hidden_q.append(
                nonlinear_f_q(getattr(self, 'recog_%i' % i)(hidden_q[-1])))

        q_mean = getattr(self, 'recog_mean')(hidden_q[-1])
        q_log_sigma = 0.5 * getattr(self, 'recog_log')(hidden_q[-1])

        eps = np.random.normal(
            0, 1,
            (x.data.shape[0], q_log_sigma.data.shape[1])).astype('float32')
        if gpu >= 0:
            eps = cuda.to_gpu(eps)

        eps = Variable(eps)
        z = q_mean + F.exp(q_log_sigma) * eps

        # compute q(x |y, z)
        hidden_p = [nonlinear_f_p(self.gen_y(y) + self.gen_z(z))]

        for i in range(n_layers_gen - 1):
            hidden_p.append(
                nonlinear_f_p(getattr(self, 'gen_%i' % i)(hidden_p[-1])))

        hidden_p.append(output_a_f(getattr(self, 'gen_out')(hidden_p[-1])))
        output = hidden_p[-1]

        rec_loss = F.mean_squared_error(output, x)
        KLD = -0.5 * F.sum(1 + q_log_sigma - q_mean**2 -
                           F.exp(q_log_sigma)) / (x_data.shape[0] *
                                                  x_data.shape[1])

        return rec_loss, KLD, output
Example #53
0
def binary_cross_entropy_with_logits(x, t):
    max_val = F.clip(-x, x_min=0., x_max=np.inf)
    loss = x - x * t + max_val + F.log(F.exp(-max_val) + F.exp(-x - max_val))
    return F.sum(loss)
    def generate(self,
                 sample_x,
                 sample_y,
                 n_layers_recog,
                 n_layers_gen,
                 nonlinear_q='relu',
                 nonlinear_p='relu',
                 output_f='sigmoid',
                 gpu=-1):
        x = Variable(sample_x)
        y = Variable(sample_y)

        # set non-linear function
        nonlinear = {
            'sigmoid': F.sigmoid,
            'tanh': F.tanh,
            'softplus': self.softplus,
            'relu': F.relu
        }
        nonlinear_f_q = nonlinear[nonlinear_q]
        nonlinear_f_p = nonlinear[nonlinear_p]

        output_activation = {
            'sigmoid': F.sigmoid,
            'identity': self.identity,
            'tanh': F.tanh
        }
        output_a_f = output_activation[output_f]

        # compute q(z|x, y)
        hidden_q = [nonlinear_f_q(self.recog_x(x) + self.recog_y(y))]

        for i in range(n_layers_recog - 1):
            hidden_q.append(
                nonlinear_f_q(getattr(self, 'recog_%i' % i)(hidden_q[-1])))

        q_mean = getattr(self, 'recog_mean')(hidden_q[-1])
        q_log_sigma = 0.5 * getattr(self, 'recog_log')(hidden_q[-1])

        eps = np.random.normal(
            0, 1,
            (x.data.shape[0], q_log_sigma.data.shape[1])).astype('float32')
        if gpu >= 0:
            eps = cuda.to_gpu(eps)

        eps = Variable(eps)
        z = q_mean + F.exp(q_log_sigma) * eps

        outputs = np.zeros((sample_y.shape[1], sample_x.shape[1]),
                           dtype=np.float32)

        for label in range(sample_y.shape[1]):
            sample_y = np.zeros((1, sample_y.shape[1]), dtype=np.float32)
            sample_y[0][label] = 1.

            # compute q(x |y, z)
            hidden_p = [
                nonlinear_f_p(self.gen_y(Variable(sample_y)) + self.gen_z(z))
            ]

            for i in range(n_layers_gen - 1):
                hidden_p.append(
                    nonlinear_f_p(getattr(self, 'gen_%i' % i)(hidden_p[-1])))

            hidden_p.append(output_a_f(getattr(self, 'gen_out')(hidden_p[-1])))
            output = hidden_p[-1]

            outputs[label] = output.data

        return outputs
Example #55
0
    times = []
    for i in range(10):
        W.cleargrad()
        b.cleargrad()
        x.cleargrad()
        log_sigma2.cleargrad()
        xp.random.seed(777)

        start = time.time()

        log_alpha = F.clip(log_sigma2 - F.log(W * W + 1e-8), -8., 8.)
        clip_mask = (log_alpha.data > loga_threshold)
        _W = (1. - clip_mask) * W
        mu = F.linear(x, _W)
        si = F.sqrt(F.linear(x * x, F.exp(log_alpha) * _W * _W) + 1e-8)
        normal_noise = xp.random.standard_normal(mu.shape).astype('f')
        y = mu + si * normal_noise
        if b is not None:
            y = F.bias(y, b)

        F.sum(y).backward()
        vs2 = [y.data,
               W.grad,
               b.grad,
               x.grad,
               log_sigma2.grad, ]
        times.append(time.time() - start)

    print('composition', numpy.mean(times[5:]))
    for v1, v2 in zip(vs1, vs2):
    def __call__(self, input_x, t):
        isVola = input_x.volatile
        output = self.predictor(input_x)
        batch_size, _, grid_h, grid_w = output.shape
        if self.predictor.train == True:
            self.seen += batch_size
        x, y, w, h, conf, prob = F.split_axis(F.reshape(output, (batch_size, self.predictor.n_boxes, self.predictor.n_classes+5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2)
        x = F.sigmoid(x) # xのactivation
        y = F.sigmoid(y) # yのactivation
        conf = F.sigmoid(conf) # confのactivation
        prob = F.transpose(prob, (0, 2, 1, 3, 4))
        prob = F.softmax(prob) # probabilityのactivation


        # 教師データの用意
        tw = np.zeros(w.shape, dtype=np.float32) # wとhが0になるように学習(e^wとe^hは1に近づく -> 担当するbboxの倍率1)
        th = np.zeros(h.shape, dtype=np.float32)
        tx = np.tile(0.5, x.shape).astype(np.float32) # 活性化後のxとyが0.5になるように学習()
        ty = np.tile(0.5, y.shape).astype(np.float32)

        if self.seen < self.unstable_seen: # centerの存在しないbbox誤差学習スケールは基本0.1
            box_learning_scale = np.tile(0.1, x.shape).astype(np.float32)
        else:
            box_learning_scale = np.tile(0, x.shape).astype(np.float32)

        tconf = np.zeros(conf.shape, dtype=np.float32) # confidenceのtruthは基本0、iouがthresh以上のものは学習しない、ただしobjectの存在するgridのbest_boxのみ真のIOUに近づかせる
        conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32)

        tprob = prob.data.copy() # best_anchor以外は学習させない(自身との二乗和誤差 = 0)

        # 全bboxとtruthのiouを計算(batch単位で計算する)
        x_shift = Variable(np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape[1:]), volatile=isVola)
        y_shift = Variable(np.broadcast_to(np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape[1:]), volatile=isVola)
        w_anchor = Variable(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape[1:]), volatile=isVola)
        h_anchor = Variable(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape[1:]), volatile=isVola)
        x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu()
        best_ious = []
        for batch in range(batch_size):
            n_truth_boxes = len(t[batch])
            box_x = (x[batch] + x_shift) * 1.0 / grid_w
            box_y = (y[batch] + y_shift) * 1.0 / grid_h
            box_w = F.exp(w[batch]) * w_anchor * 1.0 / grid_w
            box_h = F.exp(h[batch]) * h_anchor * 1.0 / grid_h

            ious = []
            for truth_index in range(n_truth_boxes):
                truth_box_x = Variable(np.broadcast_to(np.array(t[batch][truth_index]["x"], dtype=np.float32), box_x.shape), volatile=isVola)
                truth_box_y = Variable(np.broadcast_to(np.array(t[batch][truth_index]["y"], dtype=np.float32), box_y.shape), volatile=isVola)
                truth_box_w = Variable(np.broadcast_to(np.array(t[batch][truth_index]["w"], dtype=np.float32), box_w.shape), volatile=isVola)
                truth_box_h = Variable(np.broadcast_to(np.array(t[batch][truth_index]["h"], dtype=np.float32), box_h.shape), volatile=isVola)
                truth_box_x.to_gpu(), truth_box_y.to_gpu(), truth_box_w.to_gpu(), truth_box_h.to_gpu()
                ious.append(multi_box_iou(Box(box_x, box_y, box_w, box_h), Box(truth_box_x, truth_box_y, truth_box_w, truth_box_h)).data.get())  
            ious = np.array(ious)
            best_ious.append(np.max(ious, axis=0))
        best_ious = np.array(best_ious)

        # 一定以上のiouを持つanchorに対しては、confを0に下げないようにする(truthの周りのgridはconfをそのまま維持)。
        tconf[best_ious > self.thresh] = conf.data.get()[best_ious > self.thresh]
        conf_learning_scale[best_ious > self.thresh] = 0

        # objectの存在するanchor boxのみ、x、y、w、h、conf、probを個別修正
        abs_anchors = self.anchors / np.array([grid_w, grid_h])
        for batch in range(batch_size):
            for truth_box in t[batch]:
                truth_w = int(float(truth_box["x"]) * grid_w)
                truth_h = int(float(truth_box["y"]) * grid_h)
                truth_n = 0
                best_iou = 0.0
                for anchor_index, abs_anchor in enumerate(abs_anchors):
                    iou = box_iou(Box(0, 0, float(truth_box["w"]), float(truth_box["h"])), Box(0, 0, abs_anchor[0], abs_anchor[1]))
                    if best_iou < iou:
                        best_iou = iou
                        truth_n = anchor_index

                # objectの存在するanchorについて、centerを0.5ではなく、真の座標に近づかせる。anchorのスケールを1ではなく真のスケールに近づかせる。学習スケールを1にする。
                box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0 
                tx[batch, truth_n, :, truth_h, truth_w] = float(truth_box["x"]) * grid_w - truth_w
                ty[batch, truth_n, :, truth_h, truth_w] = float(truth_box["y"]) * grid_h - truth_h
                tw[batch, truth_n, :, truth_h, truth_w] = np.log(float(truth_box["w"]) * 1.0 / abs_anchors[truth_n][0])
                th[batch, truth_n, :, truth_h, truth_w] = np.log(float(truth_box["h"]) * 1.0 / abs_anchors[truth_n][1])
                tprob[batch, :, truth_n, truth_h, truth_w] = 0
                tprob[batch, int(truth_box["label"]), truth_n, truth_h, truth_w] = 1

                # IOUの観測
                full_truth_box = Box(float(truth_box["x"]), float(truth_box["y"]), float(truth_box["w"]), float(truth_box["h"]))
                predicted_box = Box(
                    (x[batch][truth_n][0][truth_h][truth_w].data.get() + truth_w) * 1.0 / grid_w, 
                    (y[batch][truth_n][0][truth_h][truth_w].data.get() + truth_h) * 1.0 / grid_h,
                    np.exp(w[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][0],
                    np.exp(h[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][1]
                )
                predicted_iou = box_iou(full_truth_box, predicted_box)
                tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou
                conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 10.0

            # debug prints
            maps = F.transpose(prob[batch], (2, 3, 1, 0)).data
#            print("best confidences and best conditional probability and predicted class of each grid:")
#            for i in range(grid_h):
#                for j in range(grid_w):
#                    print("%2d" % (int(conf[batch, :, :, i, j].data.max() * 100)), end=" ")
#                print("     ", end="")
#                for j in range(grid_w):
#                    print("%2d" % (maps[i][j][int(maps[i][j].max(axis=1).argmax())].argmax()), end=" ")
#                print("     ", end="")
#                for j in range(grid_w):
#                    print("%2d" % (maps[i][j][int(maps[i][j].max(axis=1).argmax())].max()*100), end=" ")
#                print()
#
#            print("best default iou: %.2f   predicted iou: %.2f   confidence: %.2f   class: %s" % (best_iou, predicted_iou, conf[batch][truth_n][0][truth_h][truth_w].data, t[batch][0]["label"]))
#            print("-------------------------------")
        #print("seen = %d" % self.seen)

        # loss計算
        tx, ty, tw, th, tconf, tprob = Variable(tx, volatile=isVola), Variable(ty, volatile=isVola), Variable(tw, volatile=isVola), Variable(th, volatile=isVola), Variable(tconf, volatile=isVola), Variable(tprob, volatile=isVola)
        box_learning_scale, conf_learning_scale = Variable(box_learning_scale, volatile=isVola), Variable(conf_learning_scale, volatile=isVola)
        tx.to_gpu(), ty.to_gpu(), tw.to_gpu(), th.to_gpu(), tconf.to_gpu(), tprob.to_gpu()
        box_learning_scale.to_gpu()
        conf_learning_scale.to_gpu()

        x_loss = F.sum((tx - x) ** 2 * box_learning_scale) / 2.0
        y_loss = F.sum((ty - y) ** 2 * box_learning_scale) / 2.0
        w_loss = F.sum((tw - w) ** 2 * box_learning_scale) / 2.0
        h_loss = F.sum((th - h) ** 2 * box_learning_scale) / 2.0
        c_loss = F.sum((tconf - conf) ** 2 * conf_learning_scale) / 2.0
        p_loss = F.sum((tprob - prob) ** 2) / 2.0
        print("x_loss: %f  y_loss: %f  w_loss: %f  h_loss: %f  c_loss: %f   p_loss: %f" % 
            (F.sum(x_loss).data, F.sum(y_loss).data, F.sum(w_loss).data, F.sum(h_loss).data, F.sum(c_loss).data, F.sum(p_loss).data)
        )

        loss = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss
        return loss
Example #57
0
    def __call__(self, x, rel_y, neighbor_entities, neighbor_dict, assign,
                 entities, relations, RC, EC, t, assignEtoN):
        if self.layer == 0:
            return self.easy_case(x, neighbor_entities, neighbor_dict, assign,
                                  entities, relations)

        #print 'entities', len(entities)
        if len(neighbor_dict) == 1:
            x = [x]
        else:
            x = F.split_axis(x, len(neighbor_dict), axis=0)

        if len(entities) == 1:
            t = [t]
        else:
            t = F.split_axis(t, len(entities), axis=0)

        rel_y = F.split_axis(rel_y, len(RC), axis=0)

        result = []
        for i, e in enumerate(entities):
            rt = t[i]
            tmpXList = []
            tmpValList = []

            tmpListV1 = []
            tmpListV2 = []
            tmpList2 = []
            tmpVFlag = []

            for k in assignEtoN[i]:
                v = neighbor_dict[k]
                rx = x[v]

                if (e, k) in relations: r = relations[(e, k)] * 2
                else: r = relations[(k, e)] * 2 + 1
                r_rep = rel_y[r // 2]

                #calc the attention value
                tmp2 = F.concat((rx, rt), axis=1)
                tmp2 = F.concat((tmp2, r_rep), axis=1)
                tmpList2.append(tmp2)

                tmp = F.concat((rx, r_rep), axis=1)
                #tmp = F.pad(F.concat((rx,r_rep),axis=0), ((0,0),(0,1)), 'constant')
                #tmp = F.reshape(tmp,(1,1,2,-1))

                if r % 2 == 0:
                    tmpListV1.append(tmp)
                    tmpVFlag.append(1)
                else:
                    tmpListV2.append(tmp)
                    tmpVFlag.append(-1)
            #print len(tmpListV1), len(tmpListV2), len(tmpList2)

            oV1 = []
            oV2 = []
            oAtt = []

            if (len(tmpListV1) > 0):
                inputV1 = F.concat(tmpListV1, axis=0)
                #print inputV1.shape
                outputV1 = getattr(self, self.forwardH[0][0])(inputV1)
                #print outputV1.shape
                oV1 = F.split_axis(outputV1, len(tmpListV1), axis=0)

            if (len(tmpListV2) > 0):
                inputV2 = F.concat(tmpListV2, axis=0)
                outputV2 = getattr(self, self.forwardT[0][0])(inputV2)
                oV2 = F.split_axis(outputV2, len(tmpListV2), axis=0)

            inputAtt = F.concat(tmpList2, axis=0)
            #print inputAtt.shape
            outputAtt = getattr(self, self.AttL[0][0])(inputAtt)
            #print outputAtt.shape
            oAtt = F.split_axis(outputAtt, len(tmpList2), axis=0)

            cnt1 = 0
            cnt2 = 0

            for a, flag in enumerate(tmpVFlag):
                tmpAtt = oAtt[a]
                tmpAtt = F.repeat(tmpAtt, 200)
                tmpAtt = F.reshape(tmpAtt, [-1, 200])
                tmpValList.append(F.exp(tmpAtt))
                if flag == 1:
                    tmprx = oV1[cnt1]
                    cnt1 += 1
                    tmpXList.append(tmprx)
                elif flag == -1:
                    tmprx = oV2[cnt2]
                    cnt2 += 1
                    tmpXList.append(tmprx)

            #print len(tmpXList), len(tmpValList)

            for a, val in enumerate(tmpValList):
                #print tmpXList[a].data
                #print val.data
                tmpXList[a] = tmpXList[a] * val
            result.append(sum(tmpXList) / (sum(tmpValList)))
            #print result[0].shape #(1,1,1,200) should be (1,200)

        result = F.concat(result, axis=0)
        #print len(entities)
        #print result.shape
        return result
Example #58
0
 def __call__(self, x):
     x = F.log(x) + 13.0
     h = F.leaky_relu(self.l1(x))
     h = F.leaky_relu(self.l2(h))
     h = F.leaky_relu(self.l3(h))
     return F.exp(self.l9(h) - 13.0)
 def __call__(self, x):
     out = x
     out = self.conv(out)
     bias = out * cf.broadcast_to((cf.exp(self.scale * 3.0)), out.shape)
     # bias = out
     return bias
 def decode_cupy(self, z):
     with chainer.using_config('train', False), chainer.no_backprop_mode():
         z = chainer.Variable(z.T)
         x = chf.exp(self.decode(z)).data.T  # exp(log(power)) = power
     return x