Exemple #1
0
def batch_pit_loss_faster(ys, ts, label_delay=0):
    """
    PIT loss over mini-batch.
    Args:
      ys: B-length list of predictions
      ts: B-length list of labels
    Returns:
      loss: (1,)-shape mean cross entropy over mini-batch
      labels: B-length list of permuted labels
    """

    n_speakers = ts[0].shape[1]
    xp = chainer.backend.get_array_module(ys[0])
    # (B, T, C)
    ys = F.pad_sequence(ys, padding=-1)

    losses = []
    for shift in range(n_speakers):
        # rolled along with speaker-axis
        ts_roll = [xp.roll(t, -shift, axis=1) for t in ts]
        ts_roll = F.pad_sequence(ts_roll, padding=-1)
        # loss: (B, T, C)
        loss = F.sigmoid_cross_entropy(ys, ts_roll, reduce='no')
        # sum over time: (B, C)
        loss = F.sum(loss, axis=1)
        losses.append(loss)
    # losses: (B, C, C)
    losses = F.stack(losses, axis=2)
    # losses[b, i, j] is a loss between
    # `i`-th speaker in y and `(i+j)%C`-th speaker in t

    perms = xp.array(
        list(permutations(range(n_speakers))),
        dtype='i',
    )
    # y_inds: [0,1,2,3]
    y_ind = xp.arange(n_speakers, dtype='i')
    #  perms  -> relation to t_inds      -> t_inds
    # 0,1,2,3 -> 0+j=0,1+j=1,2+j=2,3+j=3 -> 0,0,0,0
    # 0,1,3,2 -> 0+j=0,1+j=1,2+j=3,3+j=2 -> 0,0,1,3
    t_inds = xp.mod(perms - y_ind, n_speakers)

    losses_perm = []
    for t_ind in t_inds:
        losses_perm.append(F.mean(losses[:, y_ind, t_ind], axis=1))
    # losses_perm: (B, Perm)
    losses_perm = F.stack(losses_perm, axis=1)

    min_loss = F.sum(F.min(losses_perm, axis=1))

    min_loss = F.sum(F.min(losses_perm, axis=1))
    n_frames = np.sum([t.shape[0] for t in ts])
    min_loss = min_loss / n_frames

    min_indices = xp.argmin(losses_perm.array, axis=1)
    labels_perm = [t[:, perms[idx]] for t, idx in zip(ts, min_indices)]

    return min_loss, labels_perm
Exemple #2
0
 def EMD(self, z):
     """
     earth mover distance between z and standard normal
     :param z:
     :return:
     """
     xp = cuda.get_array_module(z)
     dim_z = z.shape[1]
     n = z.shape[0]
     t = xp.random.normal(size=(n * 10, dim_z)).astype("float32")
     dot = F.matmul(z, t, transb=True)
     dist = F.sum(z**2, axis=1, keepdims=True) - 2 * dot + xp.sum(t**2,
                                                                  axis=1)
     return F.mean(F.min(dist, axis=0)) + F.mean(F.min(dist, axis=1))
Exemple #3
0
def clipped_loss(x, t):
    diff = x - t
    abs_loss = abs(diff)
    squared_loss = diff ** 2
    abs_loss = F.expand_dims(abs_loss, 1)
    squared_loss = F.expand_dims(squared_loss, 1)
    return F.sum(F.min(F.concat((abs_loss, squared_loss), axis=1), axis=1))
Exemple #4
0
def pit_loss(pred, label, label_delay=0):
    """
    Permutation-invariant training (PIT) cross entropy loss function.

    Args:
      pred:  (T,C)-shaped pre-activation values
      label: (T,C)-shaped labels in {0,1}
      label_delay: if label_delay == 5:
           pred: 0 1 2 3 4 | 5 6 ... 99 100 |
          label: x x x x x | 0 1 ... 94  95 | 96 97 98 99 100
          calculated area: | <------------> |

    Returns:
      min_loss: (1,)-shape mean cross entropy
      label_perms[min_index]: permutated labels
    """
    # label permutations along the speaker axis
    label_perms = [
        label[..., list(p)] for p in permutations(range(label.shape[-1]))
    ]
    losses = F.stack([
        F.sigmoid_cross_entropy(pred[label_delay:, ...],
                                l[:len(l) - label_delay, ...])
        for l in label_perms
    ])
    xp = cuda.get_array_module(losses)
    min_loss = F.min(losses) * (len(label) - label_delay)
    min_index = cuda.to_cpu(xp.argmin(losses.data))

    return min_loss, label_perms[min_index]
 def check_backward(self, x_data, y_grad):
     gradient_check.check_backward(
         lambda x: functions.min(x, self.axis, self.keepdims),
         x_data,
         y_grad,
         dtype='d',
         **self.check_backward_options)
Exemple #6
0
    def _compute_target_q_value(self, batch):
        with chainer.using_config('train', False), \
                chainer.using_config('enable_backprop', False):
            (_, _, r, s_next, non_terminal) = batch
            r = F.reshape(r, shape=(*r.shape, 1))
            non_terminal = F.reshape(non_terminal,
                                     shape=(*non_terminal.shape, 1))

            s_next_rep = F.repeat(x=s_next,
                                  repeats=self._num_action_samples,
                                  axis=0)
            a_next_rep = self._vae._decode(s_next_rep)
            perturbed_action = self._target_perturbator(s_next_rep, a_next_rep)
            q_values = F.stack([
                q_target(s_next_rep, perturbed_action)
                for q_target in self._target_q_ensembles
            ])
            assert q_values.shape == (self._num_q_ensembles, self._batch_size *
                                      self._num_action_samples, 1)

            weighted_q_minmax = self._lambda * F.min(q_values, axis=0) \
                + (1 - self._lambda) * F.max(q_values, axis=0)
            assert weighted_q_minmax.shape == (self._batch_size *
                                               self._num_action_samples, 1)
            next_q_value = F.max(F.reshape(weighted_q_minmax,
                                           shape=(self._batch_size, -1)),
                                 axis=1,
                                 keepdims=True)
            assert next_q_value.shape == (self._batch_size, 1)
            target_q_value = r + self._gamma * next_q_value * non_terminal
            target_q_value.unchain()
            assert target_q_value.shape == (self._batch_size, 1)
        return target_q_value
Exemple #7
0
 def check_forward(self, x_data):
     x = chainer.Variable(x_data)
     y = functions.min(x, axis=self.axis, keepdims=self.keepdims)
     self.assertEqual(y.data.dtype, numpy.float32)
     y_expect = self.y_expect
     self.assertEqual(y.data.shape, y_expect.shape)
     testing.assert_allclose(y_expect, y.data)
Exemple #8
0
 def check_forward(self, x_data, axis=None, keepdims=False):
     x = chainer.Variable(x_data)
     y = functions.min(x, axis=axis, keepdims=keepdims)
     self.assertEqual(y.data.dtype, numpy.float32)
     y_expect = self.x.min(axis=axis, keepdims=keepdims)
     self.assertEqual(y.data.shape, y_expect.shape)
     gradient_check.assert_allclose(y_expect, y.data)
Exemple #9
0
 def check_forward(self, x_data, axis=None, keepdims=False):
     x = chainer.Variable(x_data)
     y = functions.min(x, axis=axis, keepdims=keepdims)
     self.assertEqual(y.data.dtype, numpy.float32)
     y_expect = self.x.min(axis=axis, keepdims=keepdims)
     self.assertEqual(y.data.shape, y_expect.shape)
     testing.assert_allclose(y_expect, y.data)
Exemple #10
0
 def check_backward(self, x_data, y_grad, axis=None, keepdims=False):
     gradient_check.check_backward(
         lambda x: functions.min(x, axis=axis, keepdims=keepdims),
         x_data,
         y_grad,
         dtype='d',
         **self.check_backward_options)
Exemple #11
0
 def prelu(self, inp, parameter):
     x = F.reshape(inp, (inp.shape[0], 1, inp.shape[1]))
     zeros = self.xp.zeros_like(x.data)
     c = F.transpose(F.concat((x, zeros), axis=1), (0, 2, 1))
     return F.max(
         c,
         axis=2) + F.broadcast_to(parameter, inp.shape) * F.min(c, axis=2)
def chamfer_distance(pc1, pc2):
    '''
    Input:
        pc1: float chainer in shape (B,N,C) the first point cloud
        pc2: float chainer in shape (B,M,C) the second point cloud
    Output:
        dist1: float chainer in shape (B,N) distance from first to second 
        idx1: int32 chainer in shape (B,N) nearest neighbor from first to second
        dist2: float chainer in shape (B,M) distance from second to first 
        idx2: int32 chainer in shape (B,M) nearest neighbor from second to first
    '''

    dist1, idx1, dist2, idx2 = 0, 0, 0, 0

    N = pc1.shape[2]
    M = pc2.shape[2]
    """
    dist = Variable(np.zeros((N,M)))
    for i in range(N):
        for j in range(M):
            dist[i,j] = functions.sum((pc1[0,:,i,0] - pc2[0,:,j,0]) ** 2)
    dist1 = functions.min(dist,axis=1)
    dist2 = functions.min(dist,axis=1)
    """

    #行列のn列目に1次元挿入
    #各次元を指定配列分だけ繰り返す、上の場合、2次元の部分をM回繰り返している。
    pc1_expand_tile = functions.tile(functions.expand_dims(pc1, 3),
                                     (1, 1, 1, M, 1))
    pc2_expand_tile = functions.tile(functions.expand_dims(pc2, 2),
                                     (1, 1, N, 1, 1))
    #pc1_expand_tile = functions.tile(pc1,(1,1,M,1))
    #pc2_expand_tile = functions.tile(pc2,(1,N,1,1))
    #pc1_expand_tile shape = pc2_expand_tile shape

    #pc_diff is difference between pc1 and pc2 in coordinate system.
    #print(pc1_expand_tile.shape)
    #print(pc2_expand_tile.shape)
    pc_diff = pc1_expand_tile - pc2_expand_tile
    pc_dist = functions.sum(pc_diff**2, axis=1)

    dist1 = functions.min(pc_dist, axis=1)
    #idx1 = functions.argmin(pc_dist, axis=1)
    dist2 = functions.min(pc_dist, axis=2)
    #idx2 = functions.argmin(pc_dist, axis=2)

    return dist1, idx1, dist2, idx2
def occupancy_grid_3d(points, *, pitch, origin, dims, threshold=1):
    d_IP, d_JP, d_KP = OccupancyGrid3D(pitch=pitch, origin=origin,
                                       dims=dims)(points)
    d_IJKP = F.sqrt(d_IP**2 + d_JP**2 + d_KP**2)
    d_IJK = F.min(d_IJKP, axis=3)
    m_IJK = F.relu(threshold - d_IJK)
    m_IJK = F.minimum(m_IJK, m_IJK.array * 0 + 1)
    return m_IJK
Exemple #14
0
def normalize_linearly(self, h):
    """Normalize h linearly in [0, 1] over dimensions 
    """
    h_max = F.max(h, axis=1, keepdims=True)
    h_min = F.min(h, axis=1, keepdims=True)
    h_norm = (h - h_min) / (h_max - h_min + 1e-10)

    return h_norm
Exemple #15
0
 def normalize_linearly(self, h):
     """Normalize h linearly over dimensions in [0, 1]
     """
     h_max = F.max(h, axis=1, keepdims=True)
     h_min = F.min(h, axis=1, keepdims=True)
     h_norm = (h - h_min) / (h_max - h_min + 1e-10)
     
     return h_norm
Exemple #16
0
 def check_backward(self, x_data, y_grad, axis=None, keepdims=False):
     gradient_check.check_backward(
         lambda x: functions.min(x, axis=axis, keepdims=keepdims),
         x_data,
         y_grad,
         eps=1e-4,
         rtol=1e-3,
         atol=1e-3)
Exemple #17
0
    def __call__(self, x, t, index):
        h = self.predict(x)
        self.history = np.append(self.history, np.array([np.mean(h.data, axis=0)]), axis=0)

        h = F.select_item(h, index)             # choose the action[index] in each column
        error_abs = abs(h - t)
        error = F.concat((F.expand_dims(error_abs ** 2, 1), F.expand_dims(error_abs, 1)), axis=1)
        # 1 < error_abs <=> error ** 2 > error,  error < 1 <=> error ** 2 < error
        self.loss = F.sum(F.min(error, axis=1)) / np.float32(len(error_abs))
        return self.loss
Exemple #18
0
    def check_backward(self, x_data, y_grad, axis=None, keepdims=False):
        x = chainer.Variable(x_data)
        y = functions.min(x, axis=axis, keepdims=keepdims)

        y.grad = y_grad
        y.backward()

        func = y.creator
        f = lambda: func.forward((x.data.copy(),))
        gx, = gradient_check.numerical_grad(f, (x.data,), (y.grad,), eps=1e-5)
        gradient_check.assert_allclose(gx, x.grad, rtol=1e-3, atol=1e-3)
Exemple #19
0
    def check_backward(self, x_data, y_grad, axis=None, keepdims=False):
        x = chainer.Variable(x_data)
        y = functions.min(x, axis=axis, keepdims=keepdims)

        y.grad = y_grad
        y.backward()

        func = y.creator
        f = lambda: func.forward((x.data.copy(), ))
        gx, = gradient_check.numerical_grad(f, (x.data, ), (y.grad, ),
                                            eps=1e-5)
        gradient_check.assert_allclose(gx, x.grad, rtol=1e-3, atol=1e-3)
Exemple #20
0
def batched_triangle_reduce_(b, t, p, n, id):
    xp = chainer.backend.get_array_module(b)
    BB, _, H, W = b.shape[:4]
    kb = xp.sum(b, axis=0).astype(xp.bool)
    kt = F.min(t, axis=0)
    kp = p[0, :, :, :]
    kn = n[0, :, :, :]
    kid = id[0, :, :, :]

    for i in range(1, BB):
        bb = (kt.data >= t[i, :, :, :].data)
        kp = F.where(bb, p[i, :, :, :], kp)
        kn = F.where(bb, n[i, :, :, :], kn)
        kid = F.where(bb, id[i, :, :, :], kid)

    b = chainer.as_variable(kb.reshape(1, 1, H, W))
    t = kt.reshape(1, 1, H, W)
    p = kp.reshape(1, 3, H, W)
    n = kn.reshape(1, 3, H, W)
    id = kid.reshape(1, 1, H, W)

    return b, t, p, n, id
Exemple #21
0
 def __call__(self, template, speech, length_of_template, length_of_speech):
     self.nodes=[]
     for i in range(length_of_template+1):
         for j in range(length_of_speech+1):
             self.nodes.append(V(ar(0.0,dtype=np.float32)))
     for i in range(length_of_template+1):
         #print("("+str(i)+",1)",end=' ')
         for j in range(length_of_speech+1):
             if(i!=0 and j!=0):
                 self.nodes[i*(length_of_speech+1)+j]=F.min(F.stack([self.nodes[(i-1)*(length_of_speech+1)+j],self.nodes[i*(length_of_speech+1)+j-1],self.nodes[(i-1)*(length_of_speech+1)+j-1]]))+F.sqrt(F.sum(dist(template[i-1],speech[j-1]))+1e-8)
     #print(self.nodes[-length_of_speech-1:])
     result_temp=self.nodes[-length_of_speech:]
     result=[]
     t1=[]
     t2=[]
     t3=[]
     for i in range(len(result_temp)):
         t1.append(F.expand_dims(result_temp[i],axis=0))
         t2.append(F.expand_dims(t1[i],axis=0))
         t3.append(F.linear(t2[i],self.W,self.b))
         result.append(F.sigmoid(t3[i]))
     y = F.hstack(result)
     return y[0]
Exemple #22
0
 def check_backward(self, x_data, y_grad):
     gradient_check.check_backward(
         lambda x: functions.min(x, self.axis, self.keepdims),
         x_data, y_grad, dtype='d',
         **self.check_backward_options)
    def get_bow_reps(self, ids, xs, xs_embed, position_info, x_spans,
                     shell_spans, x_position_info):

        assert len(x_spans[0]) == len(shell_spans[0])

        x_spans = [[[
            shell_span[0].tolist(), x_span[1].tolist()
        ] for x_span, shell_span in zip(x_spans_in_para, shell_spans_in_para)]
                   for x_spans_in_para, shell_spans_in_para in zip(
                       x_spans, shell_spans)]

        #(all_n_spans, 1) paragraph_type
        eye = self.xp.identity(4, dtype=self.xp.float32)
        para_type = [
            i.tolist() - self.max_n_spans * 2
            for i in self.xp.vstack(x_position_info)[:, 2]
        ]
        para_type = self.xp.vstack([eye[i] for i in para_type])

        #(batchsize, max_n_tokens, word_vec)
        xs_embed = chaFunc.pad_sequence(xs_embed, padding=-1)

        #(batchsize, n_spans, max_n_tokens, word_vec)
        xs_embed = [
            chaFunc.tile(xs_embed[i], (len(spans), 1, 1))
            for i, spans in enumerate(x_spans)
        ]

        #(all_spans_in_batch, max_n_tokens, word_vec)
        xs_embed = chaFunc.vstack(xs_embed)

        #(batchsize, max_n_tokens, word_id)
        xs = chaFunc.pad_sequence(xs, padding=0)

        #(batchsize, n_spans, max_n_tokens, word_id)
        xs = [
            self.xp.tile(xs[i].data, (len(spans), 1))
            for i, spans in enumerate(x_spans)
        ]

        #(all_spans_in_batch, max_n_tokens, word_id)
        xs = self.xp.vstack(xs)

        #(all_spans_in_batch, max_n_tokens, word_vec)
        mask_xs_embed_bool = self.xp.zeros(xs_embed.shape).astype(self.xp.bool)

        #(all_spans_in_batch, word_vec) the length of each span
        len_spans = self.xp.zeros(
            (xs_embed.shape[0], xs_embed.shape[2])).astype(self.xp.float32)

        #(all_spans_in_batch, (start, end))
        x_spans = np.vstack(x_spans)

        xs_ids = []
        eye = self.xp.identity(len(self.vocab), dtype=self.xp.float32)

        for i, span in enumerate(x_spans):
            mask_xs_embed_bool[i][int(span[0]):int(span[1])] = True

            xs_ids.append(self.xp.sum(eye[xs[i][int(span[0]):int(span[1])]],
                                      0))
            len_spans[i].fill(span[1] - span[0])

        # max pooling
        mask_xs_embed = self.xp.zeros(xs_embed.shape).astype(self.xp.float32)
        mask_xs_embed.fill(-self.xp.inf)
        max_pooling_xs = chaFunc.max(
            chaFunc.where(mask_xs_embed_bool, xs_embed, mask_xs_embed), 1)

        # average pooling
        mask_xs_embed = self.xp.zeros(xs_embed.shape).astype(self.xp.float32)
        avg_pooling_xs = chaFunc.sum(
            chaFunc.where(mask_xs_embed_bool, xs_embed, mask_xs_embed),
            1) / len_spans

        # min pooling
        mask_xs_embed = self.xp.zeros(xs_embed.shape).astype(self.xp.float32)
        mask_xs_embed.fill(self.xp.inf)
        min_pooling_xs = chaFunc.min(
            chaFunc.where(mask_xs_embed_bool, xs_embed, mask_xs_embed), 1)

        #(all_n_spans, max_n_tokens, vocab_size)
        xs_ids = self.xp.vstack(xs_ids)

        #(all_n_spans, feature_vector)
        if self.use_elmo:
            # We found that pooling-based features with ELMo does not significantly contribute the performance.
            # Then, we only used discrete BoW features for span-based models with ELMo.
            bow_reps = chaFunc.concat([xs_ids])
        else:
            bow_reps = chaFunc.concat(
                [max_pooling_xs, min_pooling_xs, avg_pooling_xs, xs_ids])

        assert bow_reps.shape[-1] == self.bow_feature_size

        bow_reps = chaFunc.sigmoid(self.BowFCLayer(bow_reps))
        bow_reps = chaFunc.dropout(bow_reps, self.dropout)

        return bow_reps
Exemple #24
0
def calc_distance(est_theta, theta):
    # weak regularization to the distribution of estimated thetas
    dist = F.sum(est_theta ** 2, axis=1) + (theta ** 2).sum(axis=1).T - 2 * F.matmul(est_theta, theta, transb=True)

    return F.mean(F.min(dist, axis=0)) + F.mean(F.min(dist, axis=1))
Exemple #25
0
 def test_pos_neg_duplicate_axis(self):
     x_data = numpy.random.uniform(-1, 1, (3, 2, 4)).astype(numpy.float32)
     x = chainer.Variable(x_data)
     with self.assertRaises(ValueError):
         functions.min(x, axis=(1, -2))
 def get_mask(gcam, sigma=.5, w=8):
     gcam = (gcam - F.min(gcam).data) / (F.max(gcam) - F.min(gcam)).data
     mask = F.squeeze(F.sigmoid(w * (gcam - sigma)))
     return mask
 def test_duplicate_axis(self):
     with self.assertRaises(ValueError):
         functions.min(self.x, (0, 0))
Exemple #28
0
 def check_backward(self, x_data, y_grad, axis=None, keepdims=False):
     gradient_check.check_backward(
         lambda x: functions.min(x, axis=axis, keepdims=keepdims),
         x_data, y_grad, dtype='d',
         **self.check_backward_options)
 def f(x):
     return functions.min(x, self.axis, self.keepdims)
Exemple #30
0
 def f(x):
     x = functions.min(x, self.axis, self.keepdims)
     return x * x
 def test_invalid_axis_type_in_tuple(self):
     with self.assertRaises(TypeError):
         functions.min(self.x, (1, 'x'))
 def test_invalid_axis_type(self):
     with self.assertRaises(TypeError):
         functions.min(self.x, [0])
Exemple #33
0
def batch_pit_n_speaker_loss(ys, ts, n_speakers_list):
    """
    PIT loss over mini-batch.
    Args:
      ys: B-length list of predictions (pre-activations)
      ts: B-length list of labels
      n_speakers_list: list of n_speakers in batch
    Returns:
      loss: (1,)-shape mean cross entropy over mini-batch
      labels: B-length list of permuted labels
    """
    max_n_speakers = ts[0].shape[1]
    xp = chainer.backend.get_array_module(ys[0])
    # (B, T, C)
    ys = F.pad_sequence(ys, padding=-1)

    losses = []
    for shift in range(max_n_speakers):
        # rolled along with speaker-axis
        ts_roll = [xp.roll(t, -shift, axis=1) for t in ts]
        ts_roll = F.pad_sequence(ts_roll, padding=-1)
        # loss: (B, T, C)
        loss = F.sigmoid_cross_entropy(ys, ts_roll, reduce='no')
        # sum over time: (B, C)
        loss = F.sum(loss, axis=1)
        losses.append(loss)
    # losses: (B, C, C)
    losses = F.stack(losses, axis=2)
    # losses[b, i, j] is a loss between
    # `i`-th speaker in y and `(i+j)%C`-th speaker in t

    perms = xp.array(
        list(permutations(range(max_n_speakers))),
        dtype='i',
    )
    # y_ind: [0,1,2,3]
    y_ind = xp.arange(max_n_speakers, dtype='i')
    #  perms  -> relation to t_inds      -> t_inds
    # 0,1,2,3 -> 0+j=0,1+j=1,2+j=2,3+j=3 -> 0,0,0,0
    # 0,1,3,2 -> 0+j=0,1+j=1,2+j=3,3+j=2 -> 0,0,1,3
    t_inds = xp.mod(perms - y_ind, max_n_speakers)

    losses_perm = []
    for t_ind in t_inds:
        losses_perm.append(F.mean(losses[:, y_ind, t_ind], axis=1))
    # losses_perm: (B, Perm)
    losses_perm = F.stack(losses_perm, axis=1)

    # masks: (B, Perms)
    def select_perm_indices(num, max_num):
        perms = list(permutations(range(max_num)))
        sub_perms = list(permutations(range(num)))
        return [[x[:num] for x in perms].index(perm) for perm in sub_perms]

    masks = xp.full_like(losses_perm.array, xp.inf)
    for i, t in enumerate(ts):
        n_speakers = n_speakers_list[i]
        indices = select_perm_indices(n_speakers, max_n_speakers)
        masks[i, indices] = 0
    losses_perm += masks

    min_loss = F.sum(F.min(losses_perm, axis=1))
    n_frames = np.sum([t.shape[0] for t in ts])
    min_loss = min_loss / n_frames

    min_indices = xp.argmin(losses_perm.array, axis=1)
    labels_perm = [t[:, perms[idx]] for t, idx in zip(ts, min_indices)]
    labels_perm = [
        t[:, :n_speakers]
        for t, n_speakers in zip(labels_perm, n_speakers_list)
    ]

    return min_loss, labels_perm
Exemple #34
0
    def _policy_update(self, batch):
        status = {}
        vae_status = self._train_vae(batch)

        status.update(vae_status)

        (s, a, _, _, _) = batch
        _, raw_sampled_actions = self._vae._decode_multiple(
            s, decode_num=self._num_mmd_samples)
        pi_actions, raw_pi_actions = self._pi._sample_multiple(
            s, sample_num=self._num_mmd_samples)

        if self._kernel_type == 'gaussian':
            mmd_loss = self._compute_gaussian_mmd(raw_sampled_actions,
                                                  raw_pi_actions,
                                                  sigma=self._mmd_sigma)
        elif self._kernel_type == 'laplacian':
            mmd_loss = self._compute_laplacian_mmd(raw_sampled_actions,
                                                   raw_pi_actions,
                                                   sigma=self._mmd_sigma)
        else:
            raise ValueError('Unknown kernel: {}'.format(self._kernel_type))
        assert mmd_loss.shape == (self._batch_size, 1)

        s_hat = F.expand_dims(s, axis=0)
        s_hat = F.repeat(s_hat, repeats=self._num_mmd_samples, axis=0)
        s_hat = F.reshape(s_hat,
                          shape=(self._batch_size * self._num_mmd_samples,
                                 s.shape[-1]))
        a_hat = F.transpose(pi_actions, axes=(1, 0, 2))
        a_hat = F.reshape(a_hat,
                          shape=(self._batch_size * self._num_mmd_samples,
                                 a.shape[-1]))

        q_values = F.stack([q(s_hat, a_hat) for q in self._q_ensembles])
        assert q_values.shape == (self._num_q_ensembles,
                                  self._batch_size * self._num_mmd_samples, 1)
        q_values = F.reshape(q_values,
                             shape=(self._num_q_ensembles,
                                    self._num_mmd_samples, self._batch_size,
                                    1))
        q_values = F.mean(q_values, axis=1)
        assert q_values.shape == (self._num_q_ensembles, self._batch_size, 1)
        q_min = F.min(q_values, axis=0)

        if self._use_stddev:
            q_stddev = self._compute_stddev(x=q_values, axis=0, keepdims=False)
            assert q_min.shape == q_stddev.shape
        else:
            q_stddev = 0.0

        assert q_min.shape == (self._batch_size, 1)

        if self._num_iterations > self._warmup_iterations:
            pi_loss = F.mean(-q_min + q_stddev * self._stddev_coeff +
                             self._lagrange_multiplier.exp() * mmd_loss)
        else:
            pi_loss = F.mean(self._lagrange_multiplier.exp() * mmd_loss)

        # Dual gradient descent
        # Update actor
        self._pi_optimizer.target.cleargrads()
        pi_loss.backward()
        self._pi_optimizer.update()

        # Just for maintaining consistency with original code
        if self._use_stddev:
            q_stddev.unchain()

        # Update lagrange multiplier
        lagrange_loss = -F.mean(-q_min + q_stddev * self._stddev_coeff +
                                self._lagrange_multiplier.exp() *
                                (mmd_loss - self._epsilon))
        self._lagrange_optimizer.target.cleargrads()
        lagrange_loss.backward()
        self._lagrange_optimizer.update()

        pi_loss.unchain_backward()
        lagrange_loss.unchain_backward()

        # Clip lagrange multiplier in range
        self._lagrange_multiplier.clip(-5.0, 10.0)

        xp = chainer.backend.get_array_module(pi_loss)
        status['pi_loss'] = xp.array(pi_loss.array)
        status['mmd_loss'] = xp.mean(xp.array(mmd_loss.array))
        status['lagrange_loss'] = xp.array(lagrange_loss.array)
        status['lagrange_multiplier'] = xp.array(
            self._lagrange_multiplier().array)

        return status
Exemple #35
0
 def f(x):
     return functions.min(x, self.axis, self.keepdims)
Exemple #36
0
    def __call__(self, batch_graph, targets=None):
        """
        This method performs forward calculation.

        Parameters
        ----------
        batch_graph : list consists of Graph
            contains Graphs in minibatch
        targets : targets
            this parameter is only used in regression task

        Returns
        -------
        In classification task : (batchsize, num_classes) matrix
            which means the probability of which class is each graph in.
        In regression task : (batchsize, 1) matrix
            which means the prediction value of each graph treewidth.
        """
        # set the array module based on using device
        xp = self.device.xp

        # concatenate the node_features
        X_concat = chainer.Variable(xp.concatenate([xp.array(graph.node_features) for graph in batch_graph], axis=0))
        X_concat.to_device(self.device)  # if you use GPU, you must transfer X_concat into GPU.

        # make graph pooling matrix and neighbors pooling matrix
        graph_pool = self.__preprocess_graphpool(batch_graph)
        if self.neighbor_pooling_type == "max":
            padded_neighbor_list = self.__preprocess_neighbors_maxpool(batch_graph)
        else:
            Adj_block = self.__preprocess_neighbors_sumavepool(batch_graph)

        hidden_rep = [X_concat]  # list of hidden representation at each layer (including input feature vectors)
        h = X_concat

        # perform Aggregating and Combining node features
        for layer in range(self.num_layers-1):
            # perform max neighbor pooling
            if self.neighbor_pooling_type == "max":
                # padding minimum value vector
                padded_h = F.concat((h, F.min(h, axis=0).reshape(1, h.shape[1])), axis=0)

                # make (F-dim, max_deg * nodes) matrix to perform max aggregation
                pooled_mat = F.sparse_matmul(padded_h.transpose(), padded_neighbor_list).transpose()

                # make 3D tensor
                pooled_tensor = F.reshape(pooled_mat, (padded_neighbor_list.shape[0] - 1,
                                          int(padded_neighbor_list.shape[1] / (padded_neighbor_list.shape[0] - 1)), h.shape[1]))

                # take max
                pooled = F.max(pooled_tensor, axis=1)

            # perform sum or average neighbor pooling
            else:
                pooled = F.sparse_matmul(Adj_block, h)
                if self.neighbor_pooling_type == "average":
                    degree = F.sparse_matmul(Adj_block, xp.ones((Adj_block.shape[0], 1), dtype=xp.float32))
                    pooled = pooled/degree

            # input aggregated vectors into MLP
            pooled_rep = self.mlps[layer](pooled)
            h = self.batch_norms[layer](pooled_rep)
            h = F.relu(h)
            hidden_rep.append(h)

        # perform Readout node features
        score_over_layer = 0
        for layer, h in enumerate(hidden_rep):
            # perform max readout
            if self.graph_pooling_type == "max":
                # padding minimum value
                padded_h = F.concat((h, F.min(h, axis=0).reshape(1, h.shape[1])), axis=0)

                # make (F-dim, max|V| * batchsize) matrix to perform max aggregation
                pooled_mat = F.sparse_matmul(padded_h.transpose(), graph_pool).transpose()

                # make 3D tensor
                pooled_tensor = F.reshape(pooled_mat, (len(batch_graph), int(graph_pool.shape[1] / len(batch_graph)), h.shape[1]))

                # take max
                pooled_h = F.max(pooled_tensor, axis=1)

            # sum or average readout
            else:
                pooled_h = F.sparse_matmul(graph_pool, h)

            score_over_layer += F.dropout(self.linears_prediction[layer](pooled_h), self.final_dropout)

        # final layers in regression task
        if self.task_type == "Regression":
            h = self.final_l2(score_over_layer)
            h = F.relu(h)
            score_over_layer = self.final_l1(h)

            if targets is None:
                return score_over_layer
            else:
                self.loss = F.mean_squared_error(targets.reshape(-1, 1), score_over_layer)  # MSE Loss
                self.abs_loss = F.mean_absolute_error(targets.reshape(-1, 1), score_over_layer)  # MAE Loss
                self.abs_max_loss = F.max(F.absolute_error(targets.reshape(-1, 1), score_over_layer))  # Max Absolute Error
                chainer.reporter.report({'loss': self.loss}, self)
                chainer.reporter.report({'abs_loss': self.abs_loss}, self)
                chainer.reporter.report({'abs_max_loss': self.abs_max_loss}, self)
                # return the MSE loss. If you want to use other loss, please change this sentence.
                return self.loss

        return score_over_layer
Exemple #37
0
 def test_invalid_axis_type(self):
     with self.assertRaises(TypeError):
         functions.min(self.x, [0])
    def update_core(self):
        gen = self.models['gen']
        dis = self.models['dis']
        enc = self.models['enc']
        gen_optimizer = self.get_optimizer('opt_gen')
        xp = enc.xp

        x, gt, c = self.get_batch(xp)
        if self.input_size is not None:
            _x = []
            for img in x.data.get():
                _x.append(
                    chainercv.transforms.resize(
                        img, (self.input_size, self.input_size)))
            x = Variable(xp.asarray(_x))

        # obtain initial z by encoder
        if enc.n_classes != 0:
            z = enc(x, y=c)
        else:
            z = enc(x)

        # fast updating
        with chainer.using_config('train', False):
            # out_noab : reconstruction results without auxiliary network
            outs, fast_losses, out_noab, zeta, z_prime = gen(batchsize=len(z),
                                                             z=z,
                                                             y=c,
                                                             gt=gt)

        lmd_pixel = 0.05
        fast_losses.append(
            reconstruction_loss(dis, outs[-1], gt) +
            lmd_pixel * pixel_loss(outs[-1], gt))

        loss = 0
        weights = [20, 2.0, 1.0]

        for i in range(0, len(outs)):
            loss += fast_losses[i] * weights[i]

        # reconstruction loss as an autoencoder
        lmd_ae = 100

        # lmd_ae = 0
        ae_loss = F.mean_squared_error(z, z_prime) * z.shape[0]
        loss += lmd_ae * ae_loss

        # sparse regularization
        # lmd_sparse = 0.000
        # sparse_loss = lmd_sparse * F.sum(F.absolute(zeta))
        # loss += sparse_loss

        gen.cleargrads()

        # double backprop
        loss.backward()

        gen_optimizer.update()

        # reporting
        report = dict()
        for i, loss_i in enumerate(fast_losses):
            report["loss{}".format(i + 1)] = loss_i
        report["loss_ae"] = ae_loss

        report["loss_noab"] = reconstruction_loss(
            dis, out_noab, gt) + lmd_pixel * pixel_loss(out_noab, gt)

        report["fast_alpha"] = gen.fast_alpha().data.mean()
        report["fast_benefit"] = report["loss{}".format(
            len(fast_losses))] - report["loss1"]
        report["min_slope"] = F.min(gen.preluW())
        report["max_slope"] = F.max(gen.preluW())
        report["min_slope_middle"] = F.min(gen.preluMiddleW())
        report["max_slope_middle"] = F.max(gen.preluMiddleW())

        chainer.reporter.report(report)

        if not gen.learned_lr:
            gen._fast_alpha = min(
                gen.limit_fast_alpha,
                gen.initial_fast_alpha + gen.step_fast_alpha * self.iteration)
Exemple #39
0
 def test_invalid_axis_type_in_tuple(self):
     with self.assertRaises(TypeError):
         functions.min(self.x, (1, 'x'))
    def update_core(self):
        # TODO:
        # log parametrisation of radius?

        epsilon = 1e-10
        opt = self.get_optimizer('main')
        r = F.relu(self.coords.W[:, 0])  # radius
        x = self.coords.W[:, 1:(self.args.dim + 1)]  # anchor
        c = self.coords.W[:, (self.args.dim + 1):]  # sphere centre
        a, b = self.converter(self.get_iterator('main').next())  # edge
        loss = 0

        # anchor loss
        if self.args.lambda_anchor > 0:  # DANCAR
            v, = self.converter(self.get_iterator('anchor').next())
            loss_anc = F.average(
                F.relu(
                    F.sqrt(F.sum((c[v] - x[v])**2, axis=1) + epsilon) - r[v] +
                    self.args.margin))
            chainer.report({'loss_anc': loss_anc}, self.coords)
            loss += self.args.lambda_anchor * loss_anc
        else:
            x = c

        # positive sample: a contains b
        if self.args.lambda_pos > 0:
            d = F.sqrt(F.sum((c[a] - x[b])**2, axis=1) + epsilon)
            loss_pos = F.average(
                F.relu(self.args.margin + d + self.args.dag * r[b] - r[a]))
            chainer.report({'loss_pos': loss_pos}, self.coords)
            loss += self.args.lambda_pos * loss_pos

        # negative sample
        if self.args.lambda_neg > 0:
            v, = self.converter(self.get_iterator('vertex').next())
            if self.args.batchsize_negative > 0:
                na, nb = [], []
                for u in v:  # sample non-edges. accurate but slow
                    nnbor = set(nx.non_neighbors(self.graph, u))
                    for q in random.sample(
                            nnbor,
                            min(self.args.batchsize_negative,
                                len(list(nnbor)))):
                        na.append(u)
                        nb.append(q)
                na = np.array(na)
                nb = np.array(nb)
            else:  # random vertex pairs
                na = v
                nb = np.roll(v, 1)
            d = F.sqrt(F.sum((c[na] - x[nb])**2, axis=1) + epsilon)
            loss_neg = F.average(F.relu(self.args.margin - (d - r[na])))
            #            loss_neg = F.average(F.relu(self.args.margin - (d + self.args.dag * r[nb] - r[na])))
            chainer.report({'loss_neg': loss_neg}, self.coords)
            loss += self.args.lambda_neg * loss_neg

        # radius should be similar
        if self.args.lambda_uniform_radius > 0:
            loss_uniform_radius = (F.max(r) - F.min(r))**2
            chainer.report({'loss_rad': loss_uniform_radius}, self.coords)
            loss += self.args.lambda_uniform_radius * loss_uniform_radius

        # update the coordinates
        self.coords.cleargrads()
        loss.backward()
        opt.update(loss=loss)
        self.coords.W.array[:, 0] = self.coords.xp.clip(self.coords.W.array[:,
                                                                            0],
                                                        a_min=0.01,
                                                        a_max=None)
Exemple #41
0
 def test_duplicate_axis(self):
     with self.assertRaises(ValueError):
         functions.min(self.x, (0, 0))
Exemple #42
0
 def f(x):
     x = functions.min(x, self.axis, self.keepdims)
     return x * x
 def test_pos_neg_duplicate_axis(self):
     x_data = numpy.random.uniform(-1, 1, (3, 2, 4)).astype(numpy.float32)
     x = chainer.Variable(x_data)
     with self.assertRaises(ValueError):
         functions.min(x, axis=(1, -2))