def make_img(self, x, l, s, num_lm, random=0): if random == 0: lm = Variable(xp.clip(l.data, 0, 1)) sm = Variable(xp.clip(s.data, 0, 1)) else: eps = xp.random.normal(0, 1, size=l.data.shape).astype(xp.float32) epss = xp.random.normal(0, 1, size=s.data.shape).astype(xp.float32) sm = xp.clip((s.data + xp.sqrt(self.var) * epss), 0, 1).astype(xp.float32) lm = xp.clip( l.data + xp.power(10, sm - 1) * eps * xp.sqrt(self.vars), 0, 1) sm = Variable(sm) lm = Variable(lm.astype(xp.float32)) if self.use_gpu: xm = make_sampled_image.generate_xm_rgb_gpu(lm.data, sm.data, x, num_lm, g_size=self.gsize) else: xm = make_sampled_image.generate_xm_rgb(lm.data, sm.data, x, num_lm, g_size=self.gsize) return xm, lm, sm
def use_model(self, x, t): self.reset() num_lm = x.shape[0] n_step = self.n_step s_list = xp.empty((n_step, num_lm, 1)) l_list = xp.empty((n_step, num_lm, 2)) x_list = xp.empty((n_step, num_lm, 3, self.gsize, self.gsize)) l, s, b1 = self.first_forward(x, num_lm) for i in range(n_step): if i + 1 == n_step: xm, lm, sm = self.make_img(x, l, s, num_lm, random=0) l1, s1, y, b = self.recurrent_forward(xm, lm, sm) s_list[i] = sm.data l_list[i] = lm.data x_list[i] = xm.data accuracy = y.data * t s_list = xp.power(10, s_list - 1) return xp.sum(accuracy, axis=1), l_list, s_list, x_list else: xm, lm, sm = self.make_img(x, l, s, num_lm, random=0) l1, s1, y, b = self.recurrent_forward(xm, lm, sm) l = l1 s = s1 s_list[i] = sm.data l_list[i] = lm.data x_list[i] = xm.data return
def reconstruct_audio(compressed, reference): power = xp.sqrt(xp.power(compressed, 1 / 0.3))[:, 0, :, :] power = chainer.cuda.to_cpu(power).T reference = chainer.cuda.to_cpu(reference).T reference = reference[:, :, 0, :] + reference[:, :, 1, :] * 1j phase = np.exp(1.0j * np.angle(reference)) stft = power * phase return stft
def cul_loss(self, y, target, l, s, lm, sm): zm = xp.power(10, sm.data - 1) l1, l2 = F.split_axis(l, indices_or_sections=2, axis=1) m1, m2 = F.split_axis(lm, indices_or_sections=2, axis=1) ln_p = ((l1 - m1) * (l1 - m1) + (l2 - m2) * (l2 - m2)) / self.var / zm / zm / 2 # size size_p = (sm - s) * (sm - s) / self.vars + ln_p accuracy = y * target loss = -F.sum(accuracy) return loss, size_p
def generate_xm_in_gpu(lm, sm, img, num_lm, g_size, img_size=112): xm = xp.empty((num_lm, g_size * g_size)).astype(xp.float32) img_buf = img.reshape((num_lm, img_size * img_size)) zm = xp.power(10, sm - 1) for k in range(num_lm): xr = xp.linspace((lm[k][0] - zm[k] / 2), (lm[k][0] + zm[k] / 2), g_size) xr *= img_size xr = xp.clip(xr, 0, img_size - 1).astype(np.int32) yr = xp.linspace((lm[k][1] - zm[k] / 2), (lm[k][1] + zm[k] / 2), g_size) yr *= img_size yr = xp.clip(yr, 0, img_size - 1).astype(np.int32) xr = img_size * np.repeat(xr, g_size) + xp.tile(yr, g_size) xm[k] = img_buf[k][xr] return xm.reshape(num_lm, 1, g_size, g_size).astype(xp.float32)
def cul_loss(self, y, target, l, s, lm, sm): zm = xp.power(10, sm.data - 1) l1, l2 = F.split_axis(l, indices_or_sections=2, axis=1) m1, m2 = F.split_axis(lm, indices_or_sections=2, axis=1) ln_p = ((l1 - m1) * (l1 - m1) + (l2 - m2) * (l2 - m2)) / self.var / zm / zm / 2 # size size_p = (sm - s) * (sm - s) / self.vars + ln_p accuracy = y * target loss = -F.sum(accuracy) # r = xp.where( # xp.argmax(y.data, axis=1) == xp.argmax(target.data, axis=1), 1, 0).reshape((num_lm, 1)).astype(xp.float32) # # loss += F.sum((r - b) * (r - b)) # bb = xp.sum(b.data) / num_lm # lossm = self.r * (r - bb) # loss += F.sum(Variable(lossm) * size_p) return loss, size_p