def batch_pit_loss_faster(ys, ts, label_delay=0): """ PIT loss over mini-batch. Args: ys: B-length list of predictions ts: B-length list of labels Returns: loss: (1,)-shape mean cross entropy over mini-batch labels: B-length list of permuted labels """ n_speakers = ts[0].shape[1] xp = chainer.backend.get_array_module(ys[0]) # (B, T, C) ys = F.pad_sequence(ys, padding=-1) losses = [] for shift in range(n_speakers): # rolled along with speaker-axis ts_roll = [xp.roll(t, -shift, axis=1) for t in ts] ts_roll = F.pad_sequence(ts_roll, padding=-1) # loss: (B, T, C) loss = F.sigmoid_cross_entropy(ys, ts_roll, reduce='no') # sum over time: (B, C) loss = F.sum(loss, axis=1) losses.append(loss) # losses: (B, C, C) losses = F.stack(losses, axis=2) # losses[b, i, j] is a loss between # `i`-th speaker in y and `(i+j)%C`-th speaker in t perms = xp.array( list(permutations(range(n_speakers))), dtype='i', ) # y_inds: [0,1,2,3] y_ind = xp.arange(n_speakers, dtype='i') # perms -> relation to t_inds -> t_inds # 0,1,2,3 -> 0+j=0,1+j=1,2+j=2,3+j=3 -> 0,0,0,0 # 0,1,3,2 -> 0+j=0,1+j=1,2+j=3,3+j=2 -> 0,0,1,3 t_inds = xp.mod(perms - y_ind, n_speakers) losses_perm = [] for t_ind in t_inds: losses_perm.append(F.mean(losses[:, y_ind, t_ind], axis=1)) # losses_perm: (B, Perm) losses_perm = F.stack(losses_perm, axis=1) min_loss = F.sum(F.min(losses_perm, axis=1)) min_loss = F.sum(F.min(losses_perm, axis=1)) n_frames = np.sum([t.shape[0] for t in ts]) min_loss = min_loss / n_frames min_indices = xp.argmin(losses_perm.array, axis=1) labels_perm = [t[:, perms[idx]] for t, idx in zip(ts, min_indices)] return min_loss, labels_perm
def EMD(self, z): """ earth mover distance between z and standard normal :param z: :return: """ xp = cuda.get_array_module(z) dim_z = z.shape[1] n = z.shape[0] t = xp.random.normal(size=(n * 10, dim_z)).astype("float32") dot = F.matmul(z, t, transb=True) dist = F.sum(z**2, axis=1, keepdims=True) - 2 * dot + xp.sum(t**2, axis=1) return F.mean(F.min(dist, axis=0)) + F.mean(F.min(dist, axis=1))
def clipped_loss(x, t): diff = x - t abs_loss = abs(diff) squared_loss = diff ** 2 abs_loss = F.expand_dims(abs_loss, 1) squared_loss = F.expand_dims(squared_loss, 1) return F.sum(F.min(F.concat((abs_loss, squared_loss), axis=1), axis=1))
def pit_loss(pred, label, label_delay=0): """ Permutation-invariant training (PIT) cross entropy loss function. Args: pred: (T,C)-shaped pre-activation values label: (T,C)-shaped labels in {0,1} label_delay: if label_delay == 5: pred: 0 1 2 3 4 | 5 6 ... 99 100 | label: x x x x x | 0 1 ... 94 95 | 96 97 98 99 100 calculated area: | <------------> | Returns: min_loss: (1,)-shape mean cross entropy label_perms[min_index]: permutated labels """ # label permutations along the speaker axis label_perms = [ label[..., list(p)] for p in permutations(range(label.shape[-1])) ] losses = F.stack([ F.sigmoid_cross_entropy(pred[label_delay:, ...], l[:len(l) - label_delay, ...]) for l in label_perms ]) xp = cuda.get_array_module(losses) min_loss = F.min(losses) * (len(label) - label_delay) min_index = cuda.to_cpu(xp.argmin(losses.data)) return min_loss, label_perms[min_index]
def check_backward(self, x_data, y_grad): gradient_check.check_backward( lambda x: functions.min(x, self.axis, self.keepdims), x_data, y_grad, dtype='d', **self.check_backward_options)
def _compute_target_q_value(self, batch): with chainer.using_config('train', False), \ chainer.using_config('enable_backprop', False): (_, _, r, s_next, non_terminal) = batch r = F.reshape(r, shape=(*r.shape, 1)) non_terminal = F.reshape(non_terminal, shape=(*non_terminal.shape, 1)) s_next_rep = F.repeat(x=s_next, repeats=self._num_action_samples, axis=0) a_next_rep = self._vae._decode(s_next_rep) perturbed_action = self._target_perturbator(s_next_rep, a_next_rep) q_values = F.stack([ q_target(s_next_rep, perturbed_action) for q_target in self._target_q_ensembles ]) assert q_values.shape == (self._num_q_ensembles, self._batch_size * self._num_action_samples, 1) weighted_q_minmax = self._lambda * F.min(q_values, axis=0) \ + (1 - self._lambda) * F.max(q_values, axis=0) assert weighted_q_minmax.shape == (self._batch_size * self._num_action_samples, 1) next_q_value = F.max(F.reshape(weighted_q_minmax, shape=(self._batch_size, -1)), axis=1, keepdims=True) assert next_q_value.shape == (self._batch_size, 1) target_q_value = r + self._gamma * next_q_value * non_terminal target_q_value.unchain() assert target_q_value.shape == (self._batch_size, 1) return target_q_value
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.min(x, axis=self.axis, keepdims=self.keepdims) self.assertEqual(y.data.dtype, numpy.float32) y_expect = self.y_expect self.assertEqual(y.data.shape, y_expect.shape) testing.assert_allclose(y_expect, y.data)
def check_forward(self, x_data, axis=None, keepdims=False): x = chainer.Variable(x_data) y = functions.min(x, axis=axis, keepdims=keepdims) self.assertEqual(y.data.dtype, numpy.float32) y_expect = self.x.min(axis=axis, keepdims=keepdims) self.assertEqual(y.data.shape, y_expect.shape) gradient_check.assert_allclose(y_expect, y.data)
def check_forward(self, x_data, axis=None, keepdims=False): x = chainer.Variable(x_data) y = functions.min(x, axis=axis, keepdims=keepdims) self.assertEqual(y.data.dtype, numpy.float32) y_expect = self.x.min(axis=axis, keepdims=keepdims) self.assertEqual(y.data.shape, y_expect.shape) testing.assert_allclose(y_expect, y.data)
def check_backward(self, x_data, y_grad, axis=None, keepdims=False): gradient_check.check_backward( lambda x: functions.min(x, axis=axis, keepdims=keepdims), x_data, y_grad, dtype='d', **self.check_backward_options)
def prelu(self, inp, parameter): x = F.reshape(inp, (inp.shape[0], 1, inp.shape[1])) zeros = self.xp.zeros_like(x.data) c = F.transpose(F.concat((x, zeros), axis=1), (0, 2, 1)) return F.max( c, axis=2) + F.broadcast_to(parameter, inp.shape) * F.min(c, axis=2)
def chamfer_distance(pc1, pc2): ''' Input: pc1: float chainer in shape (B,N,C) the first point cloud pc2: float chainer in shape (B,M,C) the second point cloud Output: dist1: float chainer in shape (B,N) distance from first to second idx1: int32 chainer in shape (B,N) nearest neighbor from first to second dist2: float chainer in shape (B,M) distance from second to first idx2: int32 chainer in shape (B,M) nearest neighbor from second to first ''' dist1, idx1, dist2, idx2 = 0, 0, 0, 0 N = pc1.shape[2] M = pc2.shape[2] """ dist = Variable(np.zeros((N,M))) for i in range(N): for j in range(M): dist[i,j] = functions.sum((pc1[0,:,i,0] - pc2[0,:,j,0]) ** 2) dist1 = functions.min(dist,axis=1) dist2 = functions.min(dist,axis=1) """ #行列のn列目に1次元挿入 #各次元を指定配列分だけ繰り返す、上の場合、2次元の部分をM回繰り返している。 pc1_expand_tile = functions.tile(functions.expand_dims(pc1, 3), (1, 1, 1, M, 1)) pc2_expand_tile = functions.tile(functions.expand_dims(pc2, 2), (1, 1, N, 1, 1)) #pc1_expand_tile = functions.tile(pc1,(1,1,M,1)) #pc2_expand_tile = functions.tile(pc2,(1,N,1,1)) #pc1_expand_tile shape = pc2_expand_tile shape #pc_diff is difference between pc1 and pc2 in coordinate system. #print(pc1_expand_tile.shape) #print(pc2_expand_tile.shape) pc_diff = pc1_expand_tile - pc2_expand_tile pc_dist = functions.sum(pc_diff**2, axis=1) dist1 = functions.min(pc_dist, axis=1) #idx1 = functions.argmin(pc_dist, axis=1) dist2 = functions.min(pc_dist, axis=2) #idx2 = functions.argmin(pc_dist, axis=2) return dist1, idx1, dist2, idx2
def occupancy_grid_3d(points, *, pitch, origin, dims, threshold=1): d_IP, d_JP, d_KP = OccupancyGrid3D(pitch=pitch, origin=origin, dims=dims)(points) d_IJKP = F.sqrt(d_IP**2 + d_JP**2 + d_KP**2) d_IJK = F.min(d_IJKP, axis=3) m_IJK = F.relu(threshold - d_IJK) m_IJK = F.minimum(m_IJK, m_IJK.array * 0 + 1) return m_IJK
def normalize_linearly(self, h): """Normalize h linearly in [0, 1] over dimensions """ h_max = F.max(h, axis=1, keepdims=True) h_min = F.min(h, axis=1, keepdims=True) h_norm = (h - h_min) / (h_max - h_min + 1e-10) return h_norm
def normalize_linearly(self, h): """Normalize h linearly over dimensions in [0, 1] """ h_max = F.max(h, axis=1, keepdims=True) h_min = F.min(h, axis=1, keepdims=True) h_norm = (h - h_min) / (h_max - h_min + 1e-10) return h_norm
def check_backward(self, x_data, y_grad, axis=None, keepdims=False): gradient_check.check_backward( lambda x: functions.min(x, axis=axis, keepdims=keepdims), x_data, y_grad, eps=1e-4, rtol=1e-3, atol=1e-3)
def __call__(self, x, t, index): h = self.predict(x) self.history = np.append(self.history, np.array([np.mean(h.data, axis=0)]), axis=0) h = F.select_item(h, index) # choose the action[index] in each column error_abs = abs(h - t) error = F.concat((F.expand_dims(error_abs ** 2, 1), F.expand_dims(error_abs, 1)), axis=1) # 1 < error_abs <=> error ** 2 > error, error < 1 <=> error ** 2 < error self.loss = F.sum(F.min(error, axis=1)) / np.float32(len(error_abs)) return self.loss
def check_backward(self, x_data, y_grad, axis=None, keepdims=False): x = chainer.Variable(x_data) y = functions.min(x, axis=axis, keepdims=keepdims) y.grad = y_grad y.backward() func = y.creator f = lambda: func.forward((x.data.copy(),)) gx, = gradient_check.numerical_grad(f, (x.data,), (y.grad,), eps=1e-5) gradient_check.assert_allclose(gx, x.grad, rtol=1e-3, atol=1e-3)
def check_backward(self, x_data, y_grad, axis=None, keepdims=False): x = chainer.Variable(x_data) y = functions.min(x, axis=axis, keepdims=keepdims) y.grad = y_grad y.backward() func = y.creator f = lambda: func.forward((x.data.copy(), )) gx, = gradient_check.numerical_grad(f, (x.data, ), (y.grad, ), eps=1e-5) gradient_check.assert_allclose(gx, x.grad, rtol=1e-3, atol=1e-3)
def batched_triangle_reduce_(b, t, p, n, id): xp = chainer.backend.get_array_module(b) BB, _, H, W = b.shape[:4] kb = xp.sum(b, axis=0).astype(xp.bool) kt = F.min(t, axis=0) kp = p[0, :, :, :] kn = n[0, :, :, :] kid = id[0, :, :, :] for i in range(1, BB): bb = (kt.data >= t[i, :, :, :].data) kp = F.where(bb, p[i, :, :, :], kp) kn = F.where(bb, n[i, :, :, :], kn) kid = F.where(bb, id[i, :, :, :], kid) b = chainer.as_variable(kb.reshape(1, 1, H, W)) t = kt.reshape(1, 1, H, W) p = kp.reshape(1, 3, H, W) n = kn.reshape(1, 3, H, W) id = kid.reshape(1, 1, H, W) return b, t, p, n, id
def __call__(self, template, speech, length_of_template, length_of_speech): self.nodes=[] for i in range(length_of_template+1): for j in range(length_of_speech+1): self.nodes.append(V(ar(0.0,dtype=np.float32))) for i in range(length_of_template+1): #print("("+str(i)+",1)",end=' ') for j in range(length_of_speech+1): if(i!=0 and j!=0): self.nodes[i*(length_of_speech+1)+j]=F.min(F.stack([self.nodes[(i-1)*(length_of_speech+1)+j],self.nodes[i*(length_of_speech+1)+j-1],self.nodes[(i-1)*(length_of_speech+1)+j-1]]))+F.sqrt(F.sum(dist(template[i-1],speech[j-1]))+1e-8) #print(self.nodes[-length_of_speech-1:]) result_temp=self.nodes[-length_of_speech:] result=[] t1=[] t2=[] t3=[] for i in range(len(result_temp)): t1.append(F.expand_dims(result_temp[i],axis=0)) t2.append(F.expand_dims(t1[i],axis=0)) t3.append(F.linear(t2[i],self.W,self.b)) result.append(F.sigmoid(t3[i])) y = F.hstack(result) return y[0]
def get_bow_reps(self, ids, xs, xs_embed, position_info, x_spans, shell_spans, x_position_info): assert len(x_spans[0]) == len(shell_spans[0]) x_spans = [[[ shell_span[0].tolist(), x_span[1].tolist() ] for x_span, shell_span in zip(x_spans_in_para, shell_spans_in_para)] for x_spans_in_para, shell_spans_in_para in zip( x_spans, shell_spans)] #(all_n_spans, 1) paragraph_type eye = self.xp.identity(4, dtype=self.xp.float32) para_type = [ i.tolist() - self.max_n_spans * 2 for i in self.xp.vstack(x_position_info)[:, 2] ] para_type = self.xp.vstack([eye[i] for i in para_type]) #(batchsize, max_n_tokens, word_vec) xs_embed = chaFunc.pad_sequence(xs_embed, padding=-1) #(batchsize, n_spans, max_n_tokens, word_vec) xs_embed = [ chaFunc.tile(xs_embed[i], (len(spans), 1, 1)) for i, spans in enumerate(x_spans) ] #(all_spans_in_batch, max_n_tokens, word_vec) xs_embed = chaFunc.vstack(xs_embed) #(batchsize, max_n_tokens, word_id) xs = chaFunc.pad_sequence(xs, padding=0) #(batchsize, n_spans, max_n_tokens, word_id) xs = [ self.xp.tile(xs[i].data, (len(spans), 1)) for i, spans in enumerate(x_spans) ] #(all_spans_in_batch, max_n_tokens, word_id) xs = self.xp.vstack(xs) #(all_spans_in_batch, max_n_tokens, word_vec) mask_xs_embed_bool = self.xp.zeros(xs_embed.shape).astype(self.xp.bool) #(all_spans_in_batch, word_vec) the length of each span len_spans = self.xp.zeros( (xs_embed.shape[0], xs_embed.shape[2])).astype(self.xp.float32) #(all_spans_in_batch, (start, end)) x_spans = np.vstack(x_spans) xs_ids = [] eye = self.xp.identity(len(self.vocab), dtype=self.xp.float32) for i, span in enumerate(x_spans): mask_xs_embed_bool[i][int(span[0]):int(span[1])] = True xs_ids.append(self.xp.sum(eye[xs[i][int(span[0]):int(span[1])]], 0)) len_spans[i].fill(span[1] - span[0]) # max pooling mask_xs_embed = self.xp.zeros(xs_embed.shape).astype(self.xp.float32) mask_xs_embed.fill(-self.xp.inf) max_pooling_xs = chaFunc.max( chaFunc.where(mask_xs_embed_bool, xs_embed, mask_xs_embed), 1) # average pooling mask_xs_embed = self.xp.zeros(xs_embed.shape).astype(self.xp.float32) avg_pooling_xs = chaFunc.sum( chaFunc.where(mask_xs_embed_bool, xs_embed, mask_xs_embed), 1) / len_spans # min pooling mask_xs_embed = self.xp.zeros(xs_embed.shape).astype(self.xp.float32) mask_xs_embed.fill(self.xp.inf) min_pooling_xs = chaFunc.min( chaFunc.where(mask_xs_embed_bool, xs_embed, mask_xs_embed), 1) #(all_n_spans, max_n_tokens, vocab_size) xs_ids = self.xp.vstack(xs_ids) #(all_n_spans, feature_vector) if self.use_elmo: # We found that pooling-based features with ELMo does not significantly contribute the performance. # Then, we only used discrete BoW features for span-based models with ELMo. bow_reps = chaFunc.concat([xs_ids]) else: bow_reps = chaFunc.concat( [max_pooling_xs, min_pooling_xs, avg_pooling_xs, xs_ids]) assert bow_reps.shape[-1] == self.bow_feature_size bow_reps = chaFunc.sigmoid(self.BowFCLayer(bow_reps)) bow_reps = chaFunc.dropout(bow_reps, self.dropout) return bow_reps
def calc_distance(est_theta, theta): # weak regularization to the distribution of estimated thetas dist = F.sum(est_theta ** 2, axis=1) + (theta ** 2).sum(axis=1).T - 2 * F.matmul(est_theta, theta, transb=True) return F.mean(F.min(dist, axis=0)) + F.mean(F.min(dist, axis=1))
def test_pos_neg_duplicate_axis(self): x_data = numpy.random.uniform(-1, 1, (3, 2, 4)).astype(numpy.float32) x = chainer.Variable(x_data) with self.assertRaises(ValueError): functions.min(x, axis=(1, -2))
def get_mask(gcam, sigma=.5, w=8): gcam = (gcam - F.min(gcam).data) / (F.max(gcam) - F.min(gcam)).data mask = F.squeeze(F.sigmoid(w * (gcam - sigma))) return mask
def test_duplicate_axis(self): with self.assertRaises(ValueError): functions.min(self.x, (0, 0))
def f(x): return functions.min(x, self.axis, self.keepdims)
def f(x): x = functions.min(x, self.axis, self.keepdims) return x * x
def test_invalid_axis_type_in_tuple(self): with self.assertRaises(TypeError): functions.min(self.x, (1, 'x'))
def test_invalid_axis_type(self): with self.assertRaises(TypeError): functions.min(self.x, [0])
def batch_pit_n_speaker_loss(ys, ts, n_speakers_list): """ PIT loss over mini-batch. Args: ys: B-length list of predictions (pre-activations) ts: B-length list of labels n_speakers_list: list of n_speakers in batch Returns: loss: (1,)-shape mean cross entropy over mini-batch labels: B-length list of permuted labels """ max_n_speakers = ts[0].shape[1] xp = chainer.backend.get_array_module(ys[0]) # (B, T, C) ys = F.pad_sequence(ys, padding=-1) losses = [] for shift in range(max_n_speakers): # rolled along with speaker-axis ts_roll = [xp.roll(t, -shift, axis=1) for t in ts] ts_roll = F.pad_sequence(ts_roll, padding=-1) # loss: (B, T, C) loss = F.sigmoid_cross_entropy(ys, ts_roll, reduce='no') # sum over time: (B, C) loss = F.sum(loss, axis=1) losses.append(loss) # losses: (B, C, C) losses = F.stack(losses, axis=2) # losses[b, i, j] is a loss between # `i`-th speaker in y and `(i+j)%C`-th speaker in t perms = xp.array( list(permutations(range(max_n_speakers))), dtype='i', ) # y_ind: [0,1,2,3] y_ind = xp.arange(max_n_speakers, dtype='i') # perms -> relation to t_inds -> t_inds # 0,1,2,3 -> 0+j=0,1+j=1,2+j=2,3+j=3 -> 0,0,0,0 # 0,1,3,2 -> 0+j=0,1+j=1,2+j=3,3+j=2 -> 0,0,1,3 t_inds = xp.mod(perms - y_ind, max_n_speakers) losses_perm = [] for t_ind in t_inds: losses_perm.append(F.mean(losses[:, y_ind, t_ind], axis=1)) # losses_perm: (B, Perm) losses_perm = F.stack(losses_perm, axis=1) # masks: (B, Perms) def select_perm_indices(num, max_num): perms = list(permutations(range(max_num))) sub_perms = list(permutations(range(num))) return [[x[:num] for x in perms].index(perm) for perm in sub_perms] masks = xp.full_like(losses_perm.array, xp.inf) for i, t in enumerate(ts): n_speakers = n_speakers_list[i] indices = select_perm_indices(n_speakers, max_n_speakers) masks[i, indices] = 0 losses_perm += masks min_loss = F.sum(F.min(losses_perm, axis=1)) n_frames = np.sum([t.shape[0] for t in ts]) min_loss = min_loss / n_frames min_indices = xp.argmin(losses_perm.array, axis=1) labels_perm = [t[:, perms[idx]] for t, idx in zip(ts, min_indices)] labels_perm = [ t[:, :n_speakers] for t, n_speakers in zip(labels_perm, n_speakers_list) ] return min_loss, labels_perm
def _policy_update(self, batch): status = {} vae_status = self._train_vae(batch) status.update(vae_status) (s, a, _, _, _) = batch _, raw_sampled_actions = self._vae._decode_multiple( s, decode_num=self._num_mmd_samples) pi_actions, raw_pi_actions = self._pi._sample_multiple( s, sample_num=self._num_mmd_samples) if self._kernel_type == 'gaussian': mmd_loss = self._compute_gaussian_mmd(raw_sampled_actions, raw_pi_actions, sigma=self._mmd_sigma) elif self._kernel_type == 'laplacian': mmd_loss = self._compute_laplacian_mmd(raw_sampled_actions, raw_pi_actions, sigma=self._mmd_sigma) else: raise ValueError('Unknown kernel: {}'.format(self._kernel_type)) assert mmd_loss.shape == (self._batch_size, 1) s_hat = F.expand_dims(s, axis=0) s_hat = F.repeat(s_hat, repeats=self._num_mmd_samples, axis=0) s_hat = F.reshape(s_hat, shape=(self._batch_size * self._num_mmd_samples, s.shape[-1])) a_hat = F.transpose(pi_actions, axes=(1, 0, 2)) a_hat = F.reshape(a_hat, shape=(self._batch_size * self._num_mmd_samples, a.shape[-1])) q_values = F.stack([q(s_hat, a_hat) for q in self._q_ensembles]) assert q_values.shape == (self._num_q_ensembles, self._batch_size * self._num_mmd_samples, 1) q_values = F.reshape(q_values, shape=(self._num_q_ensembles, self._num_mmd_samples, self._batch_size, 1)) q_values = F.mean(q_values, axis=1) assert q_values.shape == (self._num_q_ensembles, self._batch_size, 1) q_min = F.min(q_values, axis=0) if self._use_stddev: q_stddev = self._compute_stddev(x=q_values, axis=0, keepdims=False) assert q_min.shape == q_stddev.shape else: q_stddev = 0.0 assert q_min.shape == (self._batch_size, 1) if self._num_iterations > self._warmup_iterations: pi_loss = F.mean(-q_min + q_stddev * self._stddev_coeff + self._lagrange_multiplier.exp() * mmd_loss) else: pi_loss = F.mean(self._lagrange_multiplier.exp() * mmd_loss) # Dual gradient descent # Update actor self._pi_optimizer.target.cleargrads() pi_loss.backward() self._pi_optimizer.update() # Just for maintaining consistency with original code if self._use_stddev: q_stddev.unchain() # Update lagrange multiplier lagrange_loss = -F.mean(-q_min + q_stddev * self._stddev_coeff + self._lagrange_multiplier.exp() * (mmd_loss - self._epsilon)) self._lagrange_optimizer.target.cleargrads() lagrange_loss.backward() self._lagrange_optimizer.update() pi_loss.unchain_backward() lagrange_loss.unchain_backward() # Clip lagrange multiplier in range self._lagrange_multiplier.clip(-5.0, 10.0) xp = chainer.backend.get_array_module(pi_loss) status['pi_loss'] = xp.array(pi_loss.array) status['mmd_loss'] = xp.mean(xp.array(mmd_loss.array)) status['lagrange_loss'] = xp.array(lagrange_loss.array) status['lagrange_multiplier'] = xp.array( self._lagrange_multiplier().array) return status
def __call__(self, batch_graph, targets=None): """ This method performs forward calculation. Parameters ---------- batch_graph : list consists of Graph contains Graphs in minibatch targets : targets this parameter is only used in regression task Returns ------- In classification task : (batchsize, num_classes) matrix which means the probability of which class is each graph in. In regression task : (batchsize, 1) matrix which means the prediction value of each graph treewidth. """ # set the array module based on using device xp = self.device.xp # concatenate the node_features X_concat = chainer.Variable(xp.concatenate([xp.array(graph.node_features) for graph in batch_graph], axis=0)) X_concat.to_device(self.device) # if you use GPU, you must transfer X_concat into GPU. # make graph pooling matrix and neighbors pooling matrix graph_pool = self.__preprocess_graphpool(batch_graph) if self.neighbor_pooling_type == "max": padded_neighbor_list = self.__preprocess_neighbors_maxpool(batch_graph) else: Adj_block = self.__preprocess_neighbors_sumavepool(batch_graph) hidden_rep = [X_concat] # list of hidden representation at each layer (including input feature vectors) h = X_concat # perform Aggregating and Combining node features for layer in range(self.num_layers-1): # perform max neighbor pooling if self.neighbor_pooling_type == "max": # padding minimum value vector padded_h = F.concat((h, F.min(h, axis=0).reshape(1, h.shape[1])), axis=0) # make (F-dim, max_deg * nodes) matrix to perform max aggregation pooled_mat = F.sparse_matmul(padded_h.transpose(), padded_neighbor_list).transpose() # make 3D tensor pooled_tensor = F.reshape(pooled_mat, (padded_neighbor_list.shape[0] - 1, int(padded_neighbor_list.shape[1] / (padded_neighbor_list.shape[0] - 1)), h.shape[1])) # take max pooled = F.max(pooled_tensor, axis=1) # perform sum or average neighbor pooling else: pooled = F.sparse_matmul(Adj_block, h) if self.neighbor_pooling_type == "average": degree = F.sparse_matmul(Adj_block, xp.ones((Adj_block.shape[0], 1), dtype=xp.float32)) pooled = pooled/degree # input aggregated vectors into MLP pooled_rep = self.mlps[layer](pooled) h = self.batch_norms[layer](pooled_rep) h = F.relu(h) hidden_rep.append(h) # perform Readout node features score_over_layer = 0 for layer, h in enumerate(hidden_rep): # perform max readout if self.graph_pooling_type == "max": # padding minimum value padded_h = F.concat((h, F.min(h, axis=0).reshape(1, h.shape[1])), axis=0) # make (F-dim, max|V| * batchsize) matrix to perform max aggregation pooled_mat = F.sparse_matmul(padded_h.transpose(), graph_pool).transpose() # make 3D tensor pooled_tensor = F.reshape(pooled_mat, (len(batch_graph), int(graph_pool.shape[1] / len(batch_graph)), h.shape[1])) # take max pooled_h = F.max(pooled_tensor, axis=1) # sum or average readout else: pooled_h = F.sparse_matmul(graph_pool, h) score_over_layer += F.dropout(self.linears_prediction[layer](pooled_h), self.final_dropout) # final layers in regression task if self.task_type == "Regression": h = self.final_l2(score_over_layer) h = F.relu(h) score_over_layer = self.final_l1(h) if targets is None: return score_over_layer else: self.loss = F.mean_squared_error(targets.reshape(-1, 1), score_over_layer) # MSE Loss self.abs_loss = F.mean_absolute_error(targets.reshape(-1, 1), score_over_layer) # MAE Loss self.abs_max_loss = F.max(F.absolute_error(targets.reshape(-1, 1), score_over_layer)) # Max Absolute Error chainer.reporter.report({'loss': self.loss}, self) chainer.reporter.report({'abs_loss': self.abs_loss}, self) chainer.reporter.report({'abs_max_loss': self.abs_max_loss}, self) # return the MSE loss. If you want to use other loss, please change this sentence. return self.loss return score_over_layer
def update_core(self): gen = self.models['gen'] dis = self.models['dis'] enc = self.models['enc'] gen_optimizer = self.get_optimizer('opt_gen') xp = enc.xp x, gt, c = self.get_batch(xp) if self.input_size is not None: _x = [] for img in x.data.get(): _x.append( chainercv.transforms.resize( img, (self.input_size, self.input_size))) x = Variable(xp.asarray(_x)) # obtain initial z by encoder if enc.n_classes != 0: z = enc(x, y=c) else: z = enc(x) # fast updating with chainer.using_config('train', False): # out_noab : reconstruction results without auxiliary network outs, fast_losses, out_noab, zeta, z_prime = gen(batchsize=len(z), z=z, y=c, gt=gt) lmd_pixel = 0.05 fast_losses.append( reconstruction_loss(dis, outs[-1], gt) + lmd_pixel * pixel_loss(outs[-1], gt)) loss = 0 weights = [20, 2.0, 1.0] for i in range(0, len(outs)): loss += fast_losses[i] * weights[i] # reconstruction loss as an autoencoder lmd_ae = 100 # lmd_ae = 0 ae_loss = F.mean_squared_error(z, z_prime) * z.shape[0] loss += lmd_ae * ae_loss # sparse regularization # lmd_sparse = 0.000 # sparse_loss = lmd_sparse * F.sum(F.absolute(zeta)) # loss += sparse_loss gen.cleargrads() # double backprop loss.backward() gen_optimizer.update() # reporting report = dict() for i, loss_i in enumerate(fast_losses): report["loss{}".format(i + 1)] = loss_i report["loss_ae"] = ae_loss report["loss_noab"] = reconstruction_loss( dis, out_noab, gt) + lmd_pixel * pixel_loss(out_noab, gt) report["fast_alpha"] = gen.fast_alpha().data.mean() report["fast_benefit"] = report["loss{}".format( len(fast_losses))] - report["loss1"] report["min_slope"] = F.min(gen.preluW()) report["max_slope"] = F.max(gen.preluW()) report["min_slope_middle"] = F.min(gen.preluMiddleW()) report["max_slope_middle"] = F.max(gen.preluMiddleW()) chainer.reporter.report(report) if not gen.learned_lr: gen._fast_alpha = min( gen.limit_fast_alpha, gen.initial_fast_alpha + gen.step_fast_alpha * self.iteration)
def update_core(self): # TODO: # log parametrisation of radius? epsilon = 1e-10 opt = self.get_optimizer('main') r = F.relu(self.coords.W[:, 0]) # radius x = self.coords.W[:, 1:(self.args.dim + 1)] # anchor c = self.coords.W[:, (self.args.dim + 1):] # sphere centre a, b = self.converter(self.get_iterator('main').next()) # edge loss = 0 # anchor loss if self.args.lambda_anchor > 0: # DANCAR v, = self.converter(self.get_iterator('anchor').next()) loss_anc = F.average( F.relu( F.sqrt(F.sum((c[v] - x[v])**2, axis=1) + epsilon) - r[v] + self.args.margin)) chainer.report({'loss_anc': loss_anc}, self.coords) loss += self.args.lambda_anchor * loss_anc else: x = c # positive sample: a contains b if self.args.lambda_pos > 0: d = F.sqrt(F.sum((c[a] - x[b])**2, axis=1) + epsilon) loss_pos = F.average( F.relu(self.args.margin + d + self.args.dag * r[b] - r[a])) chainer.report({'loss_pos': loss_pos}, self.coords) loss += self.args.lambda_pos * loss_pos # negative sample if self.args.lambda_neg > 0: v, = self.converter(self.get_iterator('vertex').next()) if self.args.batchsize_negative > 0: na, nb = [], [] for u in v: # sample non-edges. accurate but slow nnbor = set(nx.non_neighbors(self.graph, u)) for q in random.sample( nnbor, min(self.args.batchsize_negative, len(list(nnbor)))): na.append(u) nb.append(q) na = np.array(na) nb = np.array(nb) else: # random vertex pairs na = v nb = np.roll(v, 1) d = F.sqrt(F.sum((c[na] - x[nb])**2, axis=1) + epsilon) loss_neg = F.average(F.relu(self.args.margin - (d - r[na]))) # loss_neg = F.average(F.relu(self.args.margin - (d + self.args.dag * r[nb] - r[na]))) chainer.report({'loss_neg': loss_neg}, self.coords) loss += self.args.lambda_neg * loss_neg # radius should be similar if self.args.lambda_uniform_radius > 0: loss_uniform_radius = (F.max(r) - F.min(r))**2 chainer.report({'loss_rad': loss_uniform_radius}, self.coords) loss += self.args.lambda_uniform_radius * loss_uniform_radius # update the coordinates self.coords.cleargrads() loss.backward() opt.update(loss=loss) self.coords.W.array[:, 0] = self.coords.xp.clip(self.coords.W.array[:, 0], a_min=0.01, a_max=None)