def original(self, enc_hs, dec_z, att_prev, scaling=2.0):
        '''AttLoc forward

        :param enc_hs:
        :param dec_z:
        :param att_prev:
        :param scaling:
        :return:
        '''
        batch = len(enc_hs)
        # pre-compute all h outside the decoder loop
        if self.pre_compute_enc_h is None:
            self.enc_h = F.pad_sequence(enc_hs)  # utt x frame x hdim
            self.h_length = self.enc_h.shape[1]
            # utt x frame x att_dim
            self.pre_compute_enc_h = linear_tensor(self.mlp_enc, self.enc_h)

        if dec_z is None:
            dec_z = chainer.Variable(self.xp.zeros(
                (batch, self.dunits), dtype=np.float32))
        else:
            dec_z = F.reshape(dec_z, (batch, self.dunits))

        # initialize attention weight with uniform dist.
        if att_prev is None:
            att_prev = [self.xp.full(
                hh.shape[0], 1.0 / hh.shape[0], dtype=np.float32) for hh in enc_hs]
            att_prev = [chainer.Variable(att) for att in att_prev]
            att_prev = F.pad_sequence(att_prev)

        # TODO(watanabe) use <chainer variable>.reshpae(), instead of F.reshape()
        # att_prev: utt x frame -> utt x 1 x 1 x frame -> utt x att_conv_chans x 1 x frame
        att_conv = self.loc_conv(
            F.reshape(att_prev, (batch, 1, 1, self.h_length)))
        # att_conv: utt x att_conv_chans x 1 x frame -> utt x frame x att_conv_chans
        att_conv = F.swapaxes(F.squeeze(att_conv, axis=2), 1, 2)
        # att_conv: utt x frame x att_conv_chans -> utt x frame x att_dim
        att_conv = linear_tensor(self.mlp_att, att_conv)

        # dec_z_tiled: utt x frame x att_dim
        dec_z_tiled = F.broadcast_to(
            F.expand_dims(self.mlp_dec(dec_z), 1), self.pre_compute_enc_h.shape)

        # dot with gvec
        # utt x frame x att_dim -> utt x frame
        # TODO(watanabe) use batch_matmul
        e = F.squeeze(linear_tensor(self.gvec, F.tanh(
            att_conv + self.pre_compute_enc_h + dec_z_tiled)), axis=2)
        # Applying a minus-large-number filter to make a probability value zero for a padded area
        # simply degrades the performance, and I gave up this implementation
        # Apply a scaling to make an attention sharp
        w = F.softmax(scaling * e)

        # weighted sum over flames
        # utt x hdim
        c = F.sum(self.enc_h * F.broadcast_to(F.expand_dims(w, 2), self.enc_h.shape), axis=1)

        return c, w
Example #2
0
 def loss_Critic(self, dis_c, alpha, dis_y1, dis_y2):
     xp = chainer.backend.get_array_module(dis_c.data)
     batchsize = len(dis_c)
     loss = F.sum((F.squeeze(dis_c)-F.squeeze(alpha))**2) / batchsize
     loss_ = F.sum(dis_y1**2) / batchsize
     loss_ += F.sum(dis_y2**2) / batchsize
     loss += loss_/2
     chainer.report({'Critic_loss':loss})
     return loss
Example #3
0
def cos_sim(x, y):
    # batchsize = 1のときsqueezeでエラー
    if len(x.shape) > 2:
        norm_x = F.normalize(F.squeeze(F.squeeze(x,axis=(2,)),axis=(2,)))
        norm_y = F.normalize(F.squeeze(F.squeeze(y,axis=(2,)),axis=(2,)))
    else:
        norm_x = F.normalize(x)
        norm_y = F.normalize(y)
    return F.batch_matmul(norm_x, norm_y, transa=True)
Example #4
0
    def __call__(self, x_block, y_in_block, y_out_block):

        batch = len(x_block)
        #embed
        ex_block = F.dropout(self.make_input_embedding(self.embed_x, x_block),
                             self.dropout)
        ey_block = F.dropout(
            self.make_input_embedding(self.embed_y, y_in_block), self.dropout)
        eyy_block = F.dropout(
            self.make_input_embedding(self.embed_yy, y_in_block), self.dropout)
        eys = F.transpose(ey_block, (0, 2, 1))
        eyys = F.transpose(eyy_block, (0, 2, 1))
        #gcnn
        h = F.expand_dims(ex_block, axis=1)
        for i in range(self.stack):
            h = self.gcnn[i](h)
        h = F.dropout(F.squeeze(h, axis=1), self.dropout)
        #Nsteolstm
        eys2 = [i for i in eys]
        eyys2 = [i for i in eyys]
        _, _, oss = self.decoder(None, None, eys2)
        _, _, oss2 = self.decoder2(None, None, eyys2)
        ss = F.stack(oss, axis=0)
        ss2 = F.stack(oss2, axis=0)
        #mask_make
        mask = (y_in_block[:, :, None] >= 0) * self.xp.ones(
            (self.batch, 1, self.n_units), dtype=bool)
        ss = F.where(mask, ss, self.xp.full(ss.shape, 0, 'f'))
        #weight_calclate
        batch_A = F.batch_matmul(ss, h) * self.scale_score
        mask = (x_block[:, 0:len(x_block[0]) - self.stack *
                        (self.width - 1)][:, None, :] >=
                0) * (y_in_block[:, :, None] >= 0)
        batch_A = F.where(mask, batch_A,
                          self.xp.full(batch_A.shape, -self.xp.inf, 'f'))
        batch_A = F.softmax(batch_A, axis=2)
        batch_A = F.where(self.xp.isnan(batch_A.data),
                          self.xp.zeros(batch_A.shape, 'f'), batch_A)
        batch_A, h = F.broadcast(batch_A[:, None], h[:, :, None])
        batch_C = F.sum(batch_A * h, axis=3)

        e = F.transpose(batch_C, (0, 2, 1))
        e = F.squeeze(F.concat(F.split_axis(e, self.batch, axis=0), axis=1))
        ss2 = F.squeeze(F.concat(F.split_axis(ss2, self.batch, axis=0),
                                 axis=1))
        t = (self.We(e) + self.Ws(ss2))
        t = F.dropout(t, self.dropout)

        concat_ys_out = F.concat(y_out_block, axis=0)
        loss = F.sum(F.softmax_cross_entropy(t, concat_ys_out,
                                             reduce='no')) / batch

        chainer.report({'loss': loss.data}, self)
        n_words = concat_ys_out.shape[0]
        perp = self.xp.exp(loss.data * batch / n_words)
        chainer.report({'perp': perp}, self)
        return loss
Example #5
0
 def _sample_state(self, transision, s_shape=(32, 7), z_shape=(32, 4)):
     s_current = self._Uniform.sample(sample_shape=s_shape)
     s_current = F.squeeze(s_current)
     s_next, _ = transision(s_current)
     z = self._Normal.sample(sample_shape=z_shape)
     z = F.squeeze(z)
     assert s_shape == s_current.shape
     assert s_shape == s_next.shape
     assert z_shape == z.shape
     return s_current, s_next, z
Example #6
0
    def __call__(self, x, enc_out=None, mask=None):
        """
            args
                x: paralleled main features in the model
                   Variable in (batch, hidden_dim, length)
                u: hidden features from Encoder
                   Variable in (batch, hidden_dim, length)
                mask: padding-mask or future-mask
                   xp-array in (batch, length, length)
                   an element takes 'False' when pad/future, otherwise 'True'
            returns
        """
        # ksize-1-convolution results in parallel linear projections
        if self.self_attention:
            qkv = F.squeeze(self.W(F.expand_dims(x, axis=3)), axis=3)
            query, key, value = F.split_axis(qkv, 3, axis=1)
        else:
            query = F.squeeze(self.W_Q(F.expand_dims(x, axis=3)), axis=3)
            kv = F.squeeze(self.W_KV(F.expand_dims(enc_out, axis=3)), axis=3)
            key, value = F.split_axis(kv, 2, axis=1)

        # make q,k,v into (batch*parallel, dim/parallel, length)shape
        query = F.concat(F.split_axis(query, self.parallel_num, axis=1),
                         axis=0)
        key = F.concat(F.split_axis(key, self.parallel_num, axis=1), axis=0)
        value = F.concat(F.split_axis(value, self.parallel_num, axis=1),
                         axis=0)
        mask = self.xp.concatenate([mask] * self.parallel_num, axis=0)

        attention_weight = F.batch_matmul(query, key, transa=True) * self.scale
        attention_weight = F.where(
            mask, attention_weight,
            self.xp.full(attention_weight.shape, -np.inf, dtype=np.float32))
        attention_weight = F.softmax(attention_weight, axis=2)
        attention_weight = F.dropout(attention_weight, self.dropout_rate)
        attention_weight = F.where(
            self.xp.isnan(attention_weight.data),
            self.xp.full(attention_weight.shape, 0, dtype=np.float32),
            attention_weight)
        self.attention_weight = copy.deepcopy(attention_weight.data)

        # attention: (batch, q-length, k-length) -> (batch, 1, q-length, k-length)
        # value: (batch, dim/parallel, k-length) -> (batch, dim/parallel, 1, k-length)
        attention_weight, value = F.broadcast(attention_weight[:, None],
                                              value[:, :, None])
        weighted_sum = F.sum(attention_weight * value, axis=3)
        weighted_sum = F.concat(F.split_axis(weighted_sum,
                                             self.parallel_num,
                                             axis=0),
                                axis=1)

        weighted_sum = F.squeeze(self.linear(
            F.expand_dims(weighted_sum, axis=3)),
                                 axis=3)
        return weighted_sum
Example #7
0
    def translate(self, xs, max_length=100):
        xs = numpy.insert(xs, 0, 2)
        xs = numpy.append(xs, 0)
        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            exs = self.embed_x(Variable(self.xp.array(xs,
                                                      dtype=self.xp.int32)))

            h = F.expand_dims(exs, axis=0)
            h = F.expand_dims(h, axis=0)
            h = F.transpose(h, (0, 1, 3, 2))
            for i in range(self.stack):
                h = self.gcnn[i](h)
            h = F.squeeze(h, axis=1)
            h = F.squeeze(h, axis=0)
            h = F.transpose(h, (1, 0))

            ys = self.xp.full(1, 2, self.xp.int32)
            result = []
            hx = None
            cx = None
            hx2 = None
            cx2 = None

            for i in range(max_length):
                eys = self.embed_y(ys)
                eyys = self.embed_yy(ys)
                eys2 = [eys]
                eyys2 = [eyys]
                hx, cx, ss = self.decoder(hx, cx, eys2)
                hx2, cx2, ss2 = self.decoder2(hx2, cx2, eyys2)

                batch_A = F.matmul(h, ss[0], transb=True) * self.scale_score
                batch_A = F.softmax(batch_A, axis=0)
                if self.weight:
                    with open("weight/wei.txt", "a", encoding="utf-8") as f:
                        for j in range(len(batch_A)):
                            f.write(str(batch_A[j][0].data) + "\n")
                        f.write("--------------\n")
                s = F.matmul(batch_A, h, transa=True)
                t = (self.We(s) + self.Ws(ss2[0]))
                ys = self.xp.argmax(t.data, axis=1).astype(self.xp.int32)
                if ys[0] == 0:
                    break
                result.append(ys)
        result = cuda.to_cpu(
            self.xp.concatenate([self.xp.expand_dims(x, 0) for x in result]).T)
        # Remove EOS taggs
        outs = []
        for y in result:
            inds = numpy.argwhere(y == EOS)
            if len(inds) > 0:
                y = y[:inds[0, 0]]
            outs.append(y)
        return outs
 def compute_kl_after_update(loss_func, n=100):
     policy = copy.deepcopy(base_policy)
     optimizer = chainer.optimizers.SGD(1e-4)
     optimizer.setup(policy)
     for _ in range(n):
         distrib = policy(x)
         policy.cleargrads()
         F.squeeze(loss_func(distrib)).backward()
         optimizer.update()
     distrib_after = policy(x)
     return float(another_distrib.kl(distrib_after).array)
Example #9
0
 def update_Q():
     # Predicted values: Q(s,a)
     y = F.squeeze(Q(obs, action), axis=1)
     # Target values: r + gamma * Q(s,policy(s))
     with chainer.no_backprop_mode():
         next_q = F.squeeze(target_Q(obs_next, target_policy(obs_next)),
                            axis=1)
         target = reward + gamma * (1 - done) * next_q
     loss = F.mean_squared_error(y, target)
     Q.cleargrads()
     loss.backward()
     opt_Q.update()
Example #10
0
 def update_Q():
     # Predicted values: Q(s,a)
     y = F.squeeze(Q(obs, action), axis=1)
     # Target values: r + gamma * Q(s,policy(s))
     with chainer.no_backprop_mode():
         next_q = F.squeeze(target_Q(obs_next, target_policy(obs_next)),
                            axis=1)
         target = reward + gamma * (1 - done) * next_q
     loss = F.mean_squared_error(y, target)
     Q.cleargrads()
     loss.backward()
     opt_Q.update()
Example #11
0
def bdot(x, y):
    """ batch direct product
    Not to be confused with Exterior product.

    :param x: batch times n
    :param y: batch times n
    :return: batch dimension
    """
    assert x.shape[0] == y.shape[0]
    assert x.shape[1] == y.shape[1]
    xT = F.expand_dims(x, 1)
    y = F.expand_dims(y, 2)
    res = F.squeeze(F.squeeze(xT @ y, axis=1), axis=1)
    return res
Example #12
0
    def update_core(self):
        gen_optimizer = self.get_optimizer('opt_gen')
        dis_optimizer = self.get_optimizer('opt_dis')

        xp = self.gen.xp
        opt = self.opt

        batch = self.get_iterator('main').next()
        batchsize = len(batch)
        x = denoise.add_noise(batch, self.opt)
        x = utils.prepare_data_for_cnn(x, opt.maxlen, opt.filter_shape)
        x_org = utils.prepare_data_for_rnn(batch,
                                           opt.maxlen,
                                           opt.sent_len,
                                           opt.n_words,
                                           is_add_GO=True)
        x = xp.array(x, dtype=np.int32)
        x_org = xp.array(x_org, dtype=np.int32)
        # generator
        syn_sents, prob = self.gen(x, x_org)  # prob: fake data

        # discriminator
        logits_real, H_real = self.dis(x)
        logits_fake, H_fake = self.dis(prob, is_prob=True)

        # one hot vector
        labels_one = xp.ones((batchsize), dtype=xp.int32)  # 1-dim array
        labels_zero = xp.zeros((batchsize), dtype=xp.int32)
        labels_fake = labels_zero  #F.concat([labels_one, labels_zero], axis=1)
        labels_real = labels_one  #F.concat([labels_zero, labels_one], axis=1)
        D_loss = F.softmax_cross_entropy(logits_real, labels_real) + \
            F.softmax_cross_entropy(logits_fake, labels_fake)

        G_loss = compute_MMD_loss(F.squeeze(H_fake), F.squeeze(H_real))

        self.gen.cleargrads()
        G_loss.backward()
        gen_optimizer.update()

        self.dis.cleargrads()
        D_loss.backward()
        dis_optimizer.update()

        H_fake.unchain_backward()
        H_real.unchain_backward()
        prob.unchain_backward()

        chainer.reporter.report({'loss_gen': G_loss})
        chainer.reporter.report({'loss_dis': D_loss})
Example #13
0
def bquad(x, Q):
    """ calcuate x^T Q x

    :param x: vector batch times n
    :param Q: batch times n times n
    :return: batch dim
    """
    assert x.shape[0] == Q.shape[0], "batch mismatch" + str(x.shape) + ":" + str(Q.shape)
    assert x.shape[1] == Q.shape[1], "mat mul dim mismatch"
    assert Q.shape[2] == Q.shape[1], "Q is not square matrix"
    xT = F.expand_dims(x, 1)
    x_ = F.expand_dims(x, 2)
    res = F.squeeze(F.squeeze(xT @ Q @ x_, axis=1), axis=1)
    assert list(res.shape) == [list(x.shape)[0]]
    return res
Example #14
0
 def bias_correction_policy_gradients(truncation_threshold):
     gs = []
     for sample in mu_samples:
         base_policy.cleargrads()
         loss = acer.compute_policy_gradient_loss(
             action=sample,
             advantage=evaluate_action(sample),
             action_distrib=pi,
             action_distrib_mu=mu,
             action_value=action_value,
             v=0,
             truncation_threshold=truncation_threshold)
         F.squeeze(loss).backward()
         gs.append(extract_gradients_as_single_vector(base_policy))
     return gs
Example #15
0
    def get_onehot_grad(self, xs, ys=None):
        if ys is None:
            with chainer.using_config('train', False):
                ys = self.predict(xs, argmax=True)
        u, exs_prem = self.encoder.get_grad(xs[0])
        v, exs_hypo = self.encoder.get_grad(xs[1])
        encodings = F.concat((u, v, F.absolute(u - v), u * v), axis=1)
        outputs = self.output(self.mlp(encodings, no_dropout=True))
        loss = F.softmax_cross_entropy(outputs, ys)

        exs = exs_hypo
        lengths = [len(x) for x in xs[1]]

        if isinstance(exs, tuple):
            exs_grad = chainer.grad([loss], exs)
            ex_sections = np.cumsum([ex.shape[0] for ex in exs[:-1]])
            exs = F.concat(exs, axis=0)
            exs_grad = F.concat(exs_grad, axis=0)
            onehot_grad = F.sum(exs_grad * exs, axis=1)
            onehot_grad = F.split_axis(onehot_grad, ex_sections, axis=0)
        else:
            exs_grad = chainer.grad([loss], [exs])[0]
            # (batch_size, n_dim, max_length, 1)
            assert exs_grad.shape == exs.shape
            onehot_grad = F.squeeze(F.sum(exs_grad * exs, 1), 2)
            onehot_grad = [x[:l] for x, l in zip(onehot_grad, lengths)]
        return onehot_grad
Example #16
0
    def get_onehot_grad(self, xs, ys=None):
        if ys is None:
            with chainer.using_config('train', False):
                ys = self.predict(xs, argmax=True)
                ys = F.expand_dims(ys, axis=1)
                ys = [y for y in ys]
        encodings, exs = self.encoder.get_grad(xs)
        outputs = self.output(encodings)
        concat_truths = F.concat(ys, axis=0)
        loss = F.softmax_cross_entropy(outputs, concat_truths)

        if isinstance(exs, tuple):
            exs_grad = chainer.grad([loss], exs)
            ex_sections = np.cumsum([ex.shape[0] for ex in exs[:-1]])
            exs = F.concat(exs, axis=0)
            exs_grad = F.concat(exs_grad, axis=0)
            onehot_grad = F.sum(exs_grad * exs, axis=1)
            onehot_grad = F.split_axis(onehot_grad, ex_sections, axis=0)
        else:
            exs_grad = chainer.grad([loss], [exs])[0]
            # (batch_size, n_dim, max_length, 1)
            assert exs_grad.shape == exs.shape
            onehot_grad = F.squeeze(F.sum(exs_grad * exs, 1), 2)
            lengths = [len(x) for x in xs]
            onehot_grad = [x[:l] for x, l in zip(onehot_grad, lengths)]
        return onehot_grad
Example #17
0
    def lstm_first_forward_func(self, xs):
        # xs T,F,in_size
        xp = chainer.cuda.cupy.get_array_module(xs[0].data)
        hx = None
        cx = None
        xs = F.transpose(xs, axes=(1, 0, 2))  # shape = F,T,in_size
        xs = [
            F.squeeze(e)
            for e in F.split_axis(xs, xs.shape[0], axis=0, force_tuple=True)
        ]
        _, _, hs = self.lstm(xs)  # hs is list of T x D variable
        hs = F.stack(hs)
        box_num, frame, _ = hs.shape
        hs = F.reshape(hs, (-1, hs.shape[-1]))
        hs = F.relu(self.fc2(hs))
        hs = F.reshape(hs, shape=(box_num, frame, -1))
        hs = F.transpose(hs, axes=(1, 0, 2))  # shape = T, F, 1024
        for relation_module_str in self.relation_module_name[:len(
                self.relation_module_name) // 2]:
            hs = getattr(self, relation_module_str)(hs, hs,
                                                    hs)  # shape = T,F, 1024
        hs = F.reshape(hs, (-1, hs.shape[-1]))
        hs = F.relu(self.fc3(hs))
        hs = F.reshape(hs, shape=(frame, box_num, -1))
        for relation_module_str in self.relation_module_name[
                len(self.relation_module_name) // 2:]:
            hs = getattr(self, relation_module_str)(hs, hs,
                                                    hs)  # shape = T,F, 1024
        hs = F.reshape(hs, (-1, hs.shape[-1]))
        hs = self.fc4(hs)

        hs = F.reshape(hs, shape=(frame, box_num, self.out_size))
        return hs
Example #18
0
    def attend(self, encoded_features):
        self.out_lstm.reset_state()
        transformed_encoded_features = F.concat([
            F.expand_dims(self.transform_encoded_features(feature), axis=1)
            for feature in encoded_features
        ],
                                                axis=1)
        concat_encoded_features = F.concat(
            [F.expand_dims(e, axis=1) for e in encoded_features], axis=1)

        lstm_output = self.xp.zeros_like(encoded_features[0])
        outputs = []
        for _ in range(self.num_labels):
            transformed_lstm_output = self.transform_out_lstm_feature(
                lstm_output)
            attended_feats = []
            for transformed_encoded_feature in F.separate(
                    transformed_encoded_features, axis=1):
                attended_feat = transformed_encoded_feature + transformed_lstm_output
                attended_feat = F.tanh(attended_feat)
                attended_feats.append(
                    self.generate_attended_feat(attended_feat))

            attended_feats = F.concat(attended_feats, axis=1)
            alphas = F.softmax(attended_feats, axis=1)

            lstm_input_feature = F.batch_matmul(alphas,
                                                concat_encoded_features,
                                                transa=True)
            lstm_input_feature = F.squeeze(lstm_input_feature, axis=1)
            lstm_output = self.out_lstm(lstm_input_feature)
            outputs.append(lstm_output)
        return outputs
Example #19
0
def gcams_to_mask(gcams_from_chainer, class_ids, dataset=None, img=None):
    if len(class_ids) == 0:
        return None
    gcams_np = []
    gcam_aggregate = None
    for i in range(len(gcams_from_chainer)):
        # gcam for class i
        gcams_np.append(cp.asnumpy(
            F.squeeze(gcams_from_chainer[i][0], 0).data))
    print(class_ids)

    for i in range(len(gcams_np)):
        # so earlier indices will have brighter heatmaps
        gcam_np = gcams_np[i]
        print("Max gcam magnitude for {}: ".format(class_names[
            int(class_ids[i]) + 1]) + str(np.max(gcams_np[i])))
        print("Min gcam magnitude for {}: ".format(class_names[
            int(class_ids[i]) + 1]) + str(np.min(gcams_np[i])))
        mask = _gcam_to_mask(gcam_np, int(class_ids[i]))
        assert mask != None
        cv2.imshow("mask", np.uint8(mask))
        cv2.waitKey(0)

        if gcam_aggregate is None:
            gcam_aggregate = mask
        else:
            gcam_aggregate = gcam_aggregate + mask
    return gcam_aggregate
Example #20
0
    def __call__(self, x, z):
        """

        Args:
            x (~chainer.Variable): Batch of input vectors.
            z (~chainer.Variable): Batch of context vectors.

        Returns:
            ~chainer.Variable: Output of the context layer.

        """

        if self.has_uninitialized_params:
            with cuda.get_device(self._device_id):
                self._initialize_params(x.size // x.shape[0])

        batch_size = x.shape[0]

        # compute adaptive filter
        W = self.predictor(z)

        # reshape linear W to the correct size
        W = F.reshape(W, [batch_size] + self.shape)

        # add constant W if defined
        if self.constantW:
            W += F.tile(self.C, (batch_size, 1, 1))

        # multiply weights with inputs in batch mode
        y = F.squeeze(F.batch_matmul(W, x), 2)

        # add bias
        y += F.tile(self.b, tuple([batch_size, 1]))

        return y
Example #21
0
    def masked_self_attention(self, input, adj, step):
        adj = np.sum(adj, axis=1)
        # [mb, atoms, ch]
        mb, atoms, ch = input.shape
        attention_layer_index = 0 if self.attention_tying else step
        # [mb, atoms, hidden_dim]
        h = functions.reshape(input, shape=(mb * atoms, ch))
        h = self.linear_transform_layer[attention_layer_index](h)
        h = functions.reshape(h, shape=(mb, atoms, -1))
        # [mb, atoms, atoms, 2 * hidden_dim]
        a_input = functions.concat([functions.tile(h, reps=(1, 1, atoms)).reshape(mb, atoms * atoms, -1),
                                    functions.tile(h, reps=(1, atoms, 1))], axis=-1).reshape(mb, atoms, atoms,
                                                                                             2 * self.hidden_dim)
        a_input = functions.reshape(a_input, shape=(mb * atoms * atoms, 2 * self.hidden_dim))
        # [mb * atoms * atoms, 2 * hidden_dim] => [mb * atoms * atoms, 1] => [mb, atoms * atoms]
        e = functions.leaky_relu(
            functions.reshape(functions.squeeze(self.neural_network_layer[attention_layer_index](a_input), axis=-1),
                              shape=(mb, atoms, atoms)))

        # [mb, atoms, atoms]
        zero_vec = -9e15 * self.xp.ones_like(e, dtype=self.xp.float32)
        # [mb, atoms, atoms]
        attention = functions.where(adj > 0, e, zero_vec)

        # [mb, atoms, atoms]
        attention = functions.softmax(attention, axis=2)
        # [mb, atoms, atoms] * [mb, atoms, hidden_dim] => [mb, atoms, hidden_dim]
        h_prime = functions.matmul(attention, h)
        h_prime = functions.elu(h_prime)
        return h_prime
Example #22
0
    def forward(self, ws, cs, ls, dep_ts=None):
        ws = map(self.emb_word, ws)
        cs = [F.squeeze(
            F.max_pooling_2d(
                self.conv_char(
                    F.expand_dims(
                        self.emb_char(c), 1)), (int(l[0]), 1)))
                    for c, l in zip(cs, ls)]
        xs_f = [F.dropout(F.concat([w, c]), 0.5) for w, c in zip(ws, cs)]
        xs_b = [x[::-1] for x in xs_f]

        _, _, hs_f = self.lstm_f(None, None, xs_f)
        _, _, hs_b = self.lstm_b(None, None, xs_b)
        hs_b = [x[::-1] for x in hs_b]
        hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, hs_b)]

        dep_ys = [self.biaffine_arc(
            F.elu(F.dropout(self.arc_dep(h), 0.32)),
            F.elu(F.dropout(self.arc_head(h), 0.32))) for h in hs]

        if dep_ts is not None:
            heads = dep_ts
        else:
            heads = [F.argmax(y, axis=1) for y in dep_ys]

        cat_ys = [self.biaffine_tag(
                    F.elu(F.dropout(self.rel_dep(h), 0.32)),
                    F.elu(F.dropout(self.rel_head(
                        F.embed_id(t, h, ignore_label=IGNORE)), 0.32)))
                  for h, t in zip(hs, heads)]

        return cat_ys, dep_ys
Example #23
0
    def encode(self, image, obj, desc, num):
        xp = cuda.cupy
        cuda.get_device(GPU.gpus_to_use[num % GPU.num_gpus]).use()

        obj = np.asarray(obj, dtype=np.float32)
        obj = np.repeat(obj[np.newaxis], image.shape[0], axis=0)
        desc = np.asarray(desc, dtype=np.float32)
        desc = np.repeat(desc[np.newaxis], image.shape[0], axis=0)

        o_in = cuda.to_gpu(obj, GPU.gpus_to_use[num % GPU.num_gpus])
        d_in = cuda.to_gpu(desc, GPU.gpus_to_use[num % GPU.num_gpus])
        x_in = cuda.to_gpu(image, GPU.gpus_to_use[num % GPU.num_gpus])

        att, _, _ = self.enc_models[num % 2](Variable(x_in),
                                             Variable(o_in),
                                             Variable(d_in),
                                             train=False)

        att = F.reshape(att, (-1, 1, self.att_size, self.att_size))
        att = F.resize_images(att, (self.image_size, self.image_size))

        cir_z, _, _, _ = self.att_enc_models[num % 2](Variable(x_in) * att,
                                                      train=False)

        return cir_z, F.squeeze(F.concat((o_in[0], d_in[0]), axis=-1))
Example #24
0
 def __call__(self, S, h):
     batch_size, src_len, hidden_size = S.data.shape
     S = self.inner_weight(F.reshape(S,
                                     (batch_size * src_len, hidden_size)))
     S = F.reshape(S, (batch_size, src_len, hidden_size))
     a = F.softmax(F.squeeze(F.batch_matmul(S, h), axis=2))
     return a
Example #25
0
    def forward(self, x):
        batchsize = x.shape[0]
        assert self.is_convdim_compatible(x),\
            'kernel dim %d is not compatible to input spatial dim %d' % (self.gm.CONV_DIM, len(x.shape) - 2)

        Z = F.reshape(x, self.gm.get_dims(tensor_id=0, expanded=True).tolist())
        image_flags = self.gm.indices2flags(self.gm.get_image_indices())
        filter_flags = self.gm.indices2flags(self.gm.get_filter_indices())
        for tensor_id in range(1, self.gm.num_tensors):
            logging.debug('Next processing:')
            logging.debug(tensor_id)

            sum_flags = self.gm.indices2flags(
                self.gm.get_sum_indices(tensor_id))
            Z = expanded_einconv(Z, self.get_param(tensor_id), sum_flags,
                                 image_flags, filter_flags, self.xp)

            if self.gm.is_relu(tensor_id) and tensor_id < (
                    self.gm.num_tensors - 1):
                Z = F.relu(Z)

            if self.batchnorm:
                Z = self.get_bn(tensor_id)(Z)

        Z = F.squeeze(Z)
        for i, d in enumerate(self.gm.dims[image_flags].tolist()):
            if d == 1:
                Z = F.expand_dims(Z, i + 2)

        if batchsize == 1:
            Z = F.expand_dims(Z, 0)
        return Z
Example #26
0
 def __call__(self, id, x):
     W = self.W_embedding(id)
     b = F.squeeze(self.b_embedding(id))
     # Reshape the vector to be the right dimensions for 2D conv
     W = F.reshape(W,
                   (self.out_channels, self.in_channels, self.kh, self.kw))
     return F.convolution_2d(x, W, b, self.stride, self.pad)
Example #27
0
 def predict(self, xs):
     """
     batch: list of splitted sentences
     """
     xs = [self.extractor.process(x) for x in xs]
     batchsize = len(xs)
     ws, cs, ls = zip(*xs)
     ws = map(self.emb_word, ws)
     cs = [
         F.squeeze(
             F.max_pooling_2d(
                 self.conv_char(F.expand_dims(self.emb_char(c), 1)),
                 (l, 1))) for c, l in zip(cs, ls)
     ]
     xs_f = [
         F.dropout(F.concat([w, c]), self.dropout_ratio, train=self.train)
         for w, c in zip(ws, cs)
     ]
     xs_b = [x[::-1] for x in xs_f]
     cx_f, hx_f, cx_b, hx_b = self._init_state(batchsize)
     _, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train)
     _, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train)
     hs_b = [x[::-1] for x in hs_b]
     ys = [
         self.linear2(F.relu(self.linear1(F.concat([h_f, h_b]))))
         for h_f, h_b in zip(hs_f, hs_b)
     ]
     return [y.data[1:-1] for y in ys]
Example #28
0
 def forward(self, equery, vmemory, ememory, mask, iteration=0):
     """Compute an attention over memory given the query."""
     # equery.shape == (..., E)
     # vmemory.shape == (..., Ms, M)
     # ememory.shape == (..., Ms, E)
     # mask.shape == (..., Ms)
     # Setup memory embedding
     eq = F.repeat(equery[..., None, :], vmemory.shape[-2],
                   -2)  # (..., Ms, E)
     # Compute content based attention
     merged = F.concat(
         [eq, ememory, eq * ememory,
          F.squared_difference(eq, ememory)], -1)  # (..., Ms, 4*E)
     inter = self.att_linear(merged, n_batch_axes=len(vmemory.shape) -
                             1)  # (..., Ms, E)
     inter = F.tanh(inter)  # (..., Ms, E)
     inter = F.dropout(inter, DROPOUT)  # (..., Ms, E)
     # Split into sentences
     lengths = np.sum(np.any((vmemory != 0), -1), -1)  # (...,)
     mems = [s[..., :l, :] for s, l in zip(F.separate(inter, 0), lengths)
             ]  # B x [(M1, E), (M2, E), ...]
     _, bimems = self.att_birnn(None,
                                mems)  # B x [(M1, 2*E), (M2, 2*E), ...]
     bimems = F.pad_sequence(bimems)  # (..., Ms, 2*E)
     att = self.att_score(bimems, n_batch_axes=len(vmemory.shape) -
                          1)  # (..., Ms, 1)
     att = F.squeeze(att, -1)  # (..., Ms)
     if mask is not None:
         att += mask * MINUS_INF  # (..., Ms)
     return att
Example #29
0
File: Models.py Project: ebsrn/CORE
    def __call__(self, x, t, dataset, train=True):

        # Create variables
        x = Variable(x)
        x.to_gpu(self.gpu_id)
        t = Variable(t)
        t.to_gpu(self.gpu_id)

        # Config mode
        if len(t.shape) == 3:
            config_mode = 'segmentation'
        elif len(t.shape) == 2:
            config_mode = 'recognition'
        else:
            raise ValueError('label format is not supported')

        # Forward
        with chainer.using_config('train', train):
            with chainer.using_config('enable_backprop', train):
                # InceptionV3 backbone
                x = self.predictor(x)
                # Classifiers
                classifier_indx = self.args.dataset.split('+').index(dataset)
                y = self.classifiers[classifier_indx](x, train)
                # Loss
                if config_mode == 'segmentation':
                    self.y = F.resize_images(y,
                                             t.shape[-2:])  # Upsampling logits
                    self.loss = F.softmax_cross_entropy(self.y, t)
                elif config_mode == 'recognition':
                    self.y = F.squeeze(F.average_pooling_2d(
                        y, ksize=y.shape[-2:]),
                                       axis=(2, 3))  # Global Average Pooling
                    self.loss = F.sigmoid_cross_entropy(self.y, t)
        # Backward
        if train:
            # Clear grads for uninitialized params
            self.cleargrads()
            # Backwards
            self.loss.backward()

        # Reporter
        if config_mode == 'segmentation':
            self.y = F.argmax(self.y, axis=1)
            self.y.to_cpu()
            t.to_cpu()
            result = eval_semantic_segmentation(list(self.y.data),
                                                list(t.data))
            del result['iou'], result['class_accuracy']
            result.update({'loss': self.loss.data.tolist()})
            self.reporter.update({dataset: result})
        elif config_mode == 'recognition':
            self.reporter.update({
                dataset: {
                    'loss': self.loss.data.tolist(),
                    'prediction': F.sigmoid(self.y).data.tolist(),
                    'groundtruth': t.data.tolist()
                }
            })
    def __call__(self, encs, hiddens, batch_size, prev_image, num_masks, color_channels):
        """
            Learn through StatelessCDNA.
            Args:
                encs: An array of computed transformation
                hiddens: An array of hidden layers
                batch_size: Size of mini batches
                prev_image: The image to transform
                num_masks: Number of masks to apply
                color_channels: Output color channels
            Returns:
                transformed: A list of masks to apply on the previous image
        """
        logger = logging.getLogger(__name__)
        
        enc0, enc1, enc2, enc3, enc4, enc5, enc6 = encs
        hidden1, hidden2, hidden3, hidden4, hidden5, hidden6, hidden7 = hiddens

        img_height = prev_image.shape[2]
        img_width = prev_image.shape[3]

        # CDNA specific
        enc7 = self.enc7(enc6)
        enc7 = F.relu(enc7)
        transformed_list = list([F.sigmoid(enc7)])

        # CDNA specific
        # Predict kernels using linear function of last layer
        cdna_input = F.reshape(hidden5, (int(batch_size), -1))
        cdna_kerns = self.cdna_kerns(cdna_input)

        # Reshape and normalize
        # B x C x H x W => B x NUM_MASKS x 1 x H x W
        cdna_kerns = F.reshape(cdna_kerns, (int(batch_size), self.num_masks, 1, DNA_KERN_SIZE, DNA_KERN_SIZE))
        cdna_kerns = F.relu(cdna_kerns - RELU_SHIFT) + RELU_SHIFT
        norm_factor = F.sum(cdna_kerns, (2, 3, 4), keepdims=True)
        cdna_kerns = broadcasted_division(cdna_kerns, norm_factor)

        # Treat the color channel dimension as the batch dimension since the same
        # transformation is applied to each color channel.
        # Treat the batch dimension as the channel dimension so that
        # F.depthwise_convolution_2d can apply a different transformation to each sample.
        cdna_kerns = F.reshape(cdna_kerns, (int(batch_size), self.num_masks, DNA_KERN_SIZE, DNA_KERN_SIZE))
        cdna_kerns = F.transpose(cdna_kerns, (1, 0, 2, 3))
        # Swap the batch and channel dimension.
        prev_image = F.transpose(prev_image, (1, 0, 2, 3))

        # Transform the image.
        transformed = F.depthwise_convolution_2d(prev_image, cdna_kerns, stride=(1, 1), pad=DNA_KERN_SIZE/2)

        # Transpose the dimensions where they belong.
        transformed = F.reshape(transformed, (color_channels, int(batch_size), self.num_masks, img_height, img_width))
        transformed = F.transpose(transformed, (2, 1, 0, 3, 4))
        transformed = F.split_axis(transformed, indices_or_sections=self.num_masks, axis=0)
        transformed = [F.squeeze(t, axis=0) for t in transformed]

        transformed_list += transformed

        return transformed_list, enc7
Example #31
0
    def __call__(self, x, lam):
        h = GRL(lam)(x)

        h = F.dropout(F.relu(self.bn1(self.fc1(h))), 0.2)

        h = self.fc2(h)

        return F.squeeze(h)
Example #32
0
 def query(self, u):
     xp = cuda.get_array_module(u)
     size = self.m.shape[1]
     inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32)
     tm = self.TA(inds)
     tc = self.TC(inds)
     tm = F.broadcast_to(tm, self.m.shape)
     tc = F.broadcast_to(tc, self.c.shape)
     p = F.softmax(F.batch_matmul(self.m + tm, u))
     o = F.batch_matmul(F.swapaxes(self.c + tc, 2, 1), p)
     o = F.squeeze(o, -1)
     u = o + u
     return u
Example #33
0
    def forward(self, inputs):
        """
        Parameters
        ----------
        inputs: ``torch.autograd.Variable``
            Shape ``(batch_size, timesteps, 50)`` of character ids representing the current batch.

        Returns
        -------
        Dict with keys:

        ``'activations'``: ``List[torch.autograd.Variable]``
            A list of activations at each layer of the network, each of shape
            ``(batch_size, timesteps + 2, embedding_dim)``
        ``'mask'``:  ``torch.autograd.Variable``
            Shape ``(batch_size, timesteps + 2)`` long tensor with sequence mask.

        Note that the output tensors all include additional special begin and end of sequence
        markers.
        """
        token_embedding = self._token_embedder.forward(inputs)
        type_representation = token_embedding['token_embedding']
        mask = token_embedding['mask']

        lstm_outputs = self._elmo_lstm.forward(type_representation, mask)

        # Prepare the output.  The first layer is duplicated.
        output_tensors = [
            F.concat([type_representation, type_representation], axis=-1)
        ]
        for layer_activations in F.split_axis(lstm_outputs, lstm_outputs.shape[0], axis=0):
            output_tensors.append(F.squeeze(layer_activations, 0))

        return {
            'activations': output_tensors,
            'mask': mask,
        }
Example #34
0
 def check_backward(self, x_data, g_data):
     gradient_check.check_backward(
         lambda x: functions.squeeze(x, self.axis),
         x_data, g_data, **self.check_backward_options)
Example #35
0
 def forward(self, inputs, device):
     x, = inputs
     return functions.squeeze(x, axis=self.axis),
    def forward(self,
                inputs,
                batch_lengths,
                initial_state=None):
        """
        Parameters
        ----------
        inputs : ``torch.FloatTensor``, required.
            A tensor of shape (batch_size, num_timesteps, input_size)
            to apply the LSTM over.
        batch_lengths : ``List[int]``, required.
            A list of length batch_size containing the lengths of the sequences in batch.
        initial_state : ``Tuple[torch.Tensor, torch.Tensor]``, optional, (default = None)
            A tuple (state, memory) representing the initial hidden state and memory
            of the LSTM. The ``state`` has shape (1, batch_size, hidden_size) and the
            ``memory`` has shape (1, batch_size, cell_size).

        Returns
        -------
        output_accumulator : ``torch.FloatTensor``
            The outputs of the LSTM for each timestep. A tensor of shape
            (batch_size, max_timesteps, hidden_size) where for a given batch
            element, all outputs past the sequence length for that batch are
            zero tensors.
        final_state : ``Tuple[``torch.FloatTensor, torch.FloatTensor]``
            A tuple (state, memory) representing the initial hidden state and memory
            of the LSTM. The ``state`` has shape (1, batch_size, hidden_size) and the
            ``memory`` has shape (1, batch_size, cell_size).
        """
        batch_size = inputs.shape[0]
        total_timesteps = inputs.shape[1]

        output_accumulator_list = []
        if initial_state is None:
            full_batch_previous_memory = chainer.Variable(
                self.xp.zeros((batch_size, self.cell_size), 'f'))
            full_batch_previous_state = chainer.Variable(
                self.xp.zeros((batch_size, self.hidden_size), 'f'))
        else:
            # first dimension is just (layer * (1 + is_bidirection)), i.e., 1.
            full_batch_previous_state = F.squeeze(initial_state[0], axis=0)
            full_batch_previous_memory = F.squeeze(initial_state[1], axis=0)

        current_length_index = batch_size - 1 if self.go_forward else 0
        if self.recurrent_dropout_probability > 0.0 and \
           (self.training or chainer.confing.train):
            dropout_mask = get_dropout_mask(self.recurrent_dropout_probability,
                                            full_batch_previous_state)
        else:
            dropout_mask = None

        for timestep in range(total_timesteps):
            # The index depends on which end we start.
            index = timestep if self.go_forward else total_timesteps - timestep - 1

            # What we are doing here is finding the index into the batch dimension
            # which we need to use for this timestep, because the sequences have
            # variable length, so once the index is greater than the length of this
            # particular batch sequence, we no longer need to do the computation for
            # this sequence. The key thing to recognise here is that the batch inputs
            # must be _ordered_ by length from longest (first in batch) to shortest
            # (last) so initially, we are going forwards with every sequence and as we
            # pass the index at which the shortest elements of the batch finish,
            # we stop picking them up for the computation.

            if self.go_forward:
                while batch_lengths[current_length_index] <= index:
                    current_length_index -= 1
            # If we're going backwards, we are _picking up_ more indices.
            else:
                # First conditional: Are we already at the maximum number of elements in the batch?
                # Second conditional: Does the next shortest sequence beyond the current batch
                # index require computation use this timestep?
                while current_length_index < (len(batch_lengths) - 1) and \
                        batch_lengths[current_length_index + 1] > index:
                    current_length_index += 1

            # Actually get the slices of the batch which we
            # need for the computation at this timestep.
            # shape (batch_size, cell_size)
            previous_memory = full_batch_previous_memory[0: current_length_index + 1]
            # Shape (batch_size, hidden_size)
            previous_state = full_batch_previous_state[0: current_length_index + 1]
            # Shape (batch_size, input_size)
            timestep_input = inputs[0: current_length_index + 1, index]

            # Do the projections for all the gates all at once.
            # Both have shape (batch_size, 4 * cell_size)
            projected_input = self.input_linearity(timestep_input)
            projected_state = self.state_linearity(previous_state)

            # Main LSTM equations using relevant chunks of the big linear
            # projections of the hidden state and inputs.
            # TODO: split_axis
            # TODO: cuda kernel
            input_gate = F.sigmoid(projected_input[:, (0 * self.cell_size):(1 * self.cell_size)] +
                                   projected_state[:, (0 * self.cell_size):(1 * self.cell_size)])
            forget_gate = F.sigmoid(projected_input[:, (1 * self.cell_size):(2 * self.cell_size)] +
                                    projected_state[:, (1 * self.cell_size):(2 * self.cell_size)])
            memory_init = F.tanh(projected_input[:, (2 * self.cell_size):(3 * self.cell_size)] +
                                 projected_state[:, (2 * self.cell_size):(3 * self.cell_size)])
            output_gate = F.sigmoid(projected_input[:, (3 * self.cell_size):(4 * self.cell_size)] +
                                    projected_state[:, (3 * self.cell_size):(4 * self.cell_size)])
            memory = input_gate * memory_init + forget_gate * previous_memory

            # Here is the non-standard part of this LSTM cell; first, we clip the
            # memory cell, then we project the output of the timestep to a smaller size
            # and again clip it.

            if self.memory_cell_clip_value:
                memory = F.clip(memory, -self.memory_cell_clip_value,
                                self.memory_cell_clip_value)

            # shape (current_length_index, cell_size)
            pre_projection_timestep_output = output_gate * F.tanh(memory)

            # shape (current_length_index, hidden_size)
            timestep_output = self.state_projection(
                pre_projection_timestep_output)
            if self.state_projection_clip_value:
                timestep_output = F.clip(timestep_output,
                                         -self.state_projection_clip_value,
                                         self.state_projection_clip_value)

            # Only do dropout if the dropout prob is > 0.0 and we are in training mode.
            if dropout_mask is not None:
                timestep_output = timestep_output * \
                    dropout_mask[0: current_length_index + 1]

            # We've been doing computation with less than the full batch, so here we create a new
            # variable for the the whole batch at this timestep and insert the result for the
            # relevant elements of the batch into it.
            full_batch_previous_memory = F.concat(
                [memory, full_batch_previous_memory[current_length_index + 1:]], axis=0)
            full_batch_previous_state = F.concat(
                [timestep_output, full_batch_previous_state[current_length_index + 1:]], axis=0)
            output_accumulator_list.append(timestep_output)

        # Mimic the pytorch API by returning state in the following shape:
        # (num_layers * num_directions, batch_size, ...). As this
        # LSTM cell cannot be stacked, the first dimension here is just 1.
        final_state = (F.expand_dims(full_batch_previous_state, 0),
                       F.expand_dims(full_batch_previous_memory, 0))
        if not self.go_forward:
            output_accumulator_list = output_accumulator_list[::-1]
        output_accumulator = F.pad_sequence(output_accumulator_list)
        output_accumulator = output_accumulator.transpose((1, 0, 2))
        # (batch_size, total_timesteps, self.hidden_size)

        return output_accumulator, final_state
Example #37
0
 def check_forward(self, x_data):
     y = functions.squeeze(x_data, axis=self.axis)
     expected = numpy.squeeze(self.x, axis=self.axis)
     testing.assert_allclose(y.data, expected, **self.check_forward_options)
Example #38
0
 def check_invalid_type(self, x_data):
     with self.assertRaises(ValueError):
         functions.squeeze(x_data, axis=self.axis)
Example #39
0
 def test_invalid_axis(self):
     with self.assertRaises(TypeError):
         functions.squeeze(self.x, axis='a')
Example #40
0
 def check_invalid_type(self, x_data):
     with self.assertRaises(type_check.InvalidType):
         functions.squeeze(x_data, axis=self.axis)
Example #41
0
File: nn.py Project: musyoku/lstm
	def __call__(self, x):
		return functions.squeeze(x, self.axis)