Esempio n. 1
0
    def __call__(self, xs, ys):
        # Before making a transpose, you need to sort two lists in descending
        # order of length.
        inds = numpy.argsort([-len(x) for x in xs]).astype('i')
        xs = [xs[i] for i in inds]
        ys = [ys[i] for i in inds]

        # Make transposed sequences.
        # Now xs[t] is a batch of words at time t.
        xs = F.transpose_sequence(xs)
        ys = F.transpose_sequence(ys)

        # h[i] is feature vector for each batch of words.
        hs = [self.feature(x) for x in xs]
        loss = self.crf(hs, ys)
        reporter.report({'loss': loss.data}, self)

        # To predict labels, call argmax method.
        _, predict = self.crf.argmax(hs)
        correct = 0
        total = 0
        for y, p in six.moves.zip(ys, predict):
            correct += self.xp.sum(y.data == p)
            total += len(y.data)
        reporter.report({'correct': correct}, self)
        reporter.report({'total': total}, self)

        return loss
Esempio n. 2
0
    def check_forward(self, xs_data):
        xs = [chainer.Variable(x) for x in xs_data]
        ys = functions.transpose_sequence(xs)
        self.assertEqual(len(ys), len(self.trans_lengths))
        for y, l in zip(ys, self.trans_lengths):
            self.assertEqual(len(y.data), l)

        for i, l in enumerate(self.trans_lengths):
            for j in six.moves.range(l):
                gradient_check.assert_allclose(ys[i].data[j], self.xs[j][i])
    def forward(self, *inputs):
        batch = len(inputs) // 6
        lefts = inputs[0: batch]
        rights = inputs[batch: batch * 2]
        dests = inputs[batch * 2: batch * 3]
        labels = inputs[batch * 3: batch * 4]
        sequences = inputs[batch * 4: batch * 5]
        leaf_labels = inputs[batch * 5: batch * 6]

        inds = numpy.argsort([-len(l) for l in lefts])
        # Sort all arrays in descending order and transpose them
        lefts = F.transpose_sequence([lefts[i] for i in inds])
        rights = F.transpose_sequence([rights[i] for i in inds])
        dests = F.transpose_sequence([dests[i] for i in inds])
        labels = F.transpose_sequence([labels[i] for i in inds])
        sequences = F.transpose_sequence([sequences[i] for i in inds])
        leaf_labels = F.transpose_sequence(
            [leaf_labels[i] for i in inds])

        batch = len(inds)
        maxlen = len(sequences)

        loss = 0
        count = 0
        correct = 0

        stack = self.xp.zeros(
            (batch, maxlen * 2, self.n_units), self.xp.float32)
        for i, (word, label) in enumerate(zip(sequences, leaf_labels)):
            batch = word.shape[0]
            es = self.leaf(word)
            ds = self.xp.full((batch,), i, self.xp.int32)
            y = self.label(es)
            loss += F.softmax_cross_entropy(y, label, normalize=False) * batch
            count += batch
            predict = self.xp.argmax(y.array, axis=1)
            correct += (predict == label.array).sum()

            stack = thin_stack.thin_stack_set(stack, ds, es)

        for left, right, dest, label in zip(lefts, rights, dests, labels):
            l, stack = thin_stack.thin_stack_get(stack, left)
            r, stack = thin_stack.thin_stack_get(stack, right)
            o = self.node(l, r)
            y = self.label(o)
            batch = l.shape[0]
            loss += F.softmax_cross_entropy(y, label, normalize=False) * batch
            count += batch
            predict = self.xp.argmax(y.array, axis=1)
            correct += (predict == label.array).sum()

            stack = thin_stack.thin_stack_set(stack, dest, o)

        loss /= count
        reporter.report({'loss': loss}, self)
        reporter.report({'total': count}, self)
        reporter.report({'correct': correct}, self)
        return loss
    def __call__(self, x, t):
        # xを入力した際のネットワーク出力と、回答t との差分を返します。
        x = F.transpose_sequence(x)
        self.eh.reset_state()

        #return self.predict(h)
        for word in range(len(x)):
            e = self.xe(x[word])
            h = self.eh(e)
        #y = F.leaky_relu(self.hy(h))
        y = self.hy(h)
        #print(y)
        t = xp.reshape(t, (len(t), 1))
        #print(t)
        loss = F.mean_squared_error(y, t)
        chainer.reporter.report({'loss': loss}, self)

        return loss
    def __call__(self, text, label, feature):
        # textを入力した際のネットワーク出力と、真値label との Rmse を返します。
        #print("text = ", text)
        #print("label = ", label)
        #print("feature = ", feature)
        x = F.transpose_sequence(text)
        label = xp.reshape(label, (len(label), 1))
        feature = xp.reshape(feature, (len(feature), 1))
        self.eh.reset_state()

        # model---->
        for word in range(len(x)):
            #print("x[word] = ", (x[word]).shape)
            e = self.xe(x[word])
            # print("shape e = ", e.shape)
            h = self.eh(e)
            # print("shape h = ", h.shape)
        cel = h
        # cel = [10, 200]

        # <----model
        for word in range(1, len(x)):
            ee = self.xe(x[len(x) - word])
            hh = self.eh(ee)
        cel_back = hh
        # cel_back = [10, 200]
        blstm = F.concat((cel, cel_back))       # blstm = [10, 400]
        #print("blstm = ", blstm)
        #print(type(blstm))
        blstm_f = F.concat((blstm, feature))    # blstm_f = [10, 401]

        predict = self.hy(blstm_f)
        # predict = [10, 1]


        mse = F.mean_squared_error(predict, label)
        rmse = F.sqrt(mse)
        chainer.reporter.report({'loss': rmse}, self)

        return rmse
Esempio n. 6
0
    def __call__(self, x):
        # 順伝播の計算を行う関数
        # :param x: 入力値
        # エンコード
        """
        ex_block = self.make_input_embedding(self.xe, x)
        ex_block = F.dropout(ex_block, 0.3)
        exs = F.transpose(ex_block,(0, 2, 1))
        exs2=[i for i in exs]
        h, _, _ = self.encoder(None, None, exs2)
        
        """
        x = F.transpose_sequence(x)
        self.eh.reset_state()
        for word in x:
            e = self.xe(word)
            e = F.dropout(e, ratio=0.1)
            h = self.eh(e)
            h = F.dropout(h, ratio=0.1)

        y = self.hy(h)
        return y
    def __call__(self, x):
        # 順伝播の計算を行う関数
        # :param x: 入力値
        # エンコード
        x = F.transpose_sequence(x)

        self.eh.reset_state()
        cel = []
        for word in range(len(x)):
            e = self.xe(x[word])
            h = self.eh(e)
            i = self.ii(h)
            cel.append(i)
        '''
        cel_back = []
        self.eh2.reset_state()
        for word in range(1, len(x)):
            ee = self.xe(x[len(x) - word])
            hh = self.eh2(ee)
            i = self.ii(hh)
            cel_back.append(i)

        zz = F.concat((cel[0], cel_back[0]))

        for con in range(1, len(cel) - 1):
            kkk = F.concat((cel[con], cel_back[con]))
            zz = F.concat((zz, kkk))

        # 分類
        # z = F.concat((h,hh))
        len(zz)
        '''
        y = self.hy(cel)

        pp = F.softmax(y)
        # print(pp.data.argmax(axis=1))

        return y
Esempio n. 8
0
    def predictor2(self, text):
        x = F.transpose_sequence(text)
        self.eh.reset_state()

        # model---->
        for word in range(len(x)):
            e = self.xe(x[word])
            h = self.eh(e)
        cel = h
        # cel = [10, 200]

        # <----model
        for word in range(1, len(x)):
            ee = self.xe(x[len(x) - word])
            hh = self.eh(ee)
        cel_back = hh
        # cel_back = [10, 200]
        blstm = F.concat((cel, cel_back))  # blstm = [10, 400]
        blstm_f = F.concat((blstm, feature))  # blstm_f = [10, 401]

        predict = self.hy(blstm_f)

        return predict
Esempio n. 9
0
    def __call__(self, x, Label):
        # 順伝播の計算を行う関数
        # :param x: 入力値
        # :param y:  label
        # エンコード
        x = F.transpose_sequence(x)

        self.eh.reset_state()
        cel = []
        for word in range(len(x)):
            e = self.xe(x[word])
            h = self.eh(e)
            i = self.ii(h)
            cel.append(i)
        # print("cel = ", cel)
        zz = F.concat((cel[0], cel[1]))
        for i in range(2, len(cel) - 1):
            zz = F.concat((zz, cel[i]))

        y = self.hy(zz)

        # pp = F.softmax(y)
        # print(pp.data.argmax(axis=1))
        score = F.sigmoid(y) * 6
        # print("y = ", y)
        # print("new_y", new_y)
        # print("score = ", score)

        # loss = 1 / 2 * ((score - yl) ** 2)

        rmse = 0
        for i in range(BATCH_SIZE):
            rmse += ((score[i] - Label[i])**2)
        rmse = F.sqrt(rmse / BATCH_SIZE)
        print("RMSE = ", rmse.data)
        return rmse
 def f(*xs):
     return functions.transpose_sequence(xs)
Esempio n. 11
0
 def __call__(self, xs, ys):
     xs = permutate_list(xs, argsort_list_descent(xs), inv=False)
     xs = F.transpose_sequence(xs)
     ys = permutate_list(ys, argsort_list_descent(ys), inv=False)
     ys = F.transpose_sequence(ys)
     return super(CRF, self).__call__(xs, ys)
Esempio n. 12
0
 def argmax(self, xs):
     xs = permutate_list(xs, argsort_list_descent(xs), inv=False)
     xs = F.transpose_sequence(xs)
     score, path = super(CRF, self).argmax(xs)
     path = F.transpose_sequence(path)
     return score, path
Esempio n. 13
0
 def __call__(self, xs, ys):
     xs = permutate_list(xs, argsort_list_descent(xs), inv=False)
     xs = F.transpose_sequence(xs)
     ys = permutate_list(ys, argsort_list_descent(ys), inv=False)
     ys = F.transpose_sequence(ys)
     return super(CRF, self).__call__(xs, ys)
Esempio n. 14
0
 def argmax(self, xs):
     xs = permutate_list(xs, argsort_list_descent(xs), inv=False)
     xs = F.transpose_sequence(xs)
     score, path = super(CRF, self).argmax(xs)
     path = F.transpose_sequence(path)
     return score, path
    def _test_mask_recurrent_state_at(self, gpu):
        in_size = 2
        out_size = 4
        rseq = StatelessRecurrentSequential(
            L.Linear(in_size, 3),
            F.elu,
            L.NStepGRU(1, 3, out_size, 0),
            F.softmax,
        )
        if gpu >= 0:
            chainer.cuda.get_device_from_id(gpu).use()
            rseq.to_gpu()
        xp = rseq.xp
        seqs_x = [
            xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32),
            xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32),
        ]
        transposed_x = F.transpose_sequence(seqs_x)
        print('transposed_x[0]', transposed_x[0])

        def no_mask_n_step_forward():
            nomask_nstep_out, nstep_rs = rseq.n_step_forward(
                seqs_x, None, output_mode='concat')
            return F.reshape(nomask_nstep_out, (2, 2, out_size)), nstep_rs
        nstep_out, nstep_rs = no_mask_n_step_forward()

        # Check if n_step_forward and forward twice results are same
        def no_mask_forward_twice():
            _, rs = rseq(transposed_x[0], None)
            return rseq(transposed_x[1], rs)
        nomask_out, nomask_rs = no_mask_forward_twice()
        xp.testing.assert_allclose(
            nstep_out.array[:, 1],
            nomask_out.array,
        )
        xp.testing.assert_allclose(nstep_rs[0].array, nomask_rs[0].array)

        # 1st-only mask forward twice: only 2nd should be the same
        def mask0_forward_twice():
            _, rs = rseq(transposed_x[0], None)
            rs = rseq.mask_recurrent_state_at(rs, 0)
            return rseq(transposed_x[1], rs)
        mask0_out, mask0_rs = mask0_forward_twice()
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out.array[0, 1],
                mask0_out.array[0],
            )
        xp.testing.assert_allclose(
            nstep_out.array[1, 1],
            mask0_out.array[1],
        )

        # 2nd-only mask forward twice: only 1st should be the same
        def mask1_forward_twice():
            _, rs = rseq(transposed_x[0], None)
            rs = rseq.mask_recurrent_state_at(rs, 1)
            return rseq(transposed_x[1], rs)
        mask1_out, mask1_rs = mask1_forward_twice()
        xp.testing.assert_allclose(
            nstep_out.array[0, 1],
            mask1_out.array[0],
        )
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out.array[1, 1],
                mask1_out.array[1],
            )

        # both 1st and 2nd mask forward twice: both should be different
        def mask01_forward_twice():
            _, rs = rseq(transposed_x[0], None)
            rs = rseq.mask_recurrent_state_at(rs, [0, 1])
            return rseq(transposed_x[1], rs)
        mask01_out, mask01_rs = mask01_forward_twice()
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out.array[0, 1],
                mask01_out.array[0],
            )
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out.array[1, 1],
                mask01_out.array[1],
            )

        # get and concat recurrent states and resume forward
        def get_and_concat_rs_forward():
            _, rs = rseq(transposed_x[0], None)
            rs0 = rseq.get_recurrent_state_at(rs, 0, unwrap_variable=True)
            rs1 = rseq.get_recurrent_state_at(rs, 1, unwrap_variable=True)
            concat_rs = rseq.concatenate_recurrent_states([rs0, rs1])
            return rseq(transposed_x[1], concat_rs)
        getcon_out, getcon_rs = get_and_concat_rs_forward()
        xp.testing.assert_allclose(getcon_rs[0].array, nomask_rs[0].array)
        xp.testing.assert_allclose(
            nstep_out.array[0, 1], getcon_out.array[0])
        xp.testing.assert_allclose(
            nstep_out.array[1, 1], getcon_out.array[1])
Esempio n. 16
0
 def __call__(self, x):
     x = F.transpose_sequence(x)
     for x_ in x:
         self.lstm(F.dropout(self.embed(x_), train=self.train))
     h = self.out(F.dropout(self.lstm.h, train=self.train))
     return h
Esempio n. 17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=100,
                        help='Number of LSTM units in each layer')
    parser.add_argument('--glove',
                        type=str,
                        default="",
                        help='path to glove vector')
    parser.add_argument('--model-type',
                        dest='model_type',
                        type=str,
                        required=True,
                        help='bilstm / lstm / charlstm')
    parser.add_argument('--model',
                        type=str,
                        required=True,
                        help='path to model file')
    parser.add_argument('--dev',
                        action='store_true',
                        help='If true, use validation data')
    parser.set_defaults(dev=False)
    args = parser.parse_args()

    data = DataProcessor(data_path="../work/", use_gpu=-1, test=False)
    data.prepare()

    if args.dev:
        test = data.dev_data
    else:
        test = data.test_data

    if args.model_type == "lstm":
        model = CRFNERTagger(n_vocab=len(data.vocab),
                             embed_dim=100,
                             hidden_dim=args.unit,
                             n_tag=len(data.tag),
                             dropout=None)
    elif args.model_type == 'bilstm':
        model = CRFBiNERTagger(n_vocab=len(data.vocab),
                               embed_dim=100,
                               hidden_dim=args.unit,
                               n_tag=len(data.tag),
                               dropout=None)
    elif args.model_type == 'charlstm':
        model = CRFBiCharNERTagger(n_vocab=len(data.vocab),
                                   n_char=len(data.char),
                                   embed_dim=100,
                                   hidden_dim=args.unit,
                                   n_tag=len(data.tag),
                                   dropout=None)

    # load glove vector
    if args.glove:
        sys.stderr.write("loading GloVe...")
        model.load_glove(args.glove, data.vocab)
        sys.stderr.write("done.\n")

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    serializers.load_npz(args.model, model)

    test_iter = chainer.iterators.SerialIterator(test,
                                                 repeat=False,
                                                 shuffle=False,
                                                 batch_size=10)

    id2tag = data.id2tag
    id2vocab = data.id2vocab

    for ys, ts in tqdm(predict(test_iter, args.model_type, model, args.unit)):
        # minibatch-unit-loop
        ys = [[id2tag[i] for i in y.data] for y in F.transpose_sequence(ys)]
        ts = [[id2tag[i] for i in t.data] for t in F.transpose_sequence(ts)]
        # instance-loop
        for predict_seq, target_seq in zip(ys, ts):
            for p, t in zip(predict_seq, target_seq):
                print("{}\t{}".format(p, t))
            print()
Esempio n. 18
0
    def __call__(self, x, Label, feature):
        # 順伝播の計算を行う関数
        # :param x: 入力値
        # :param y:  label
        # エンコード
        #print("x = ", x.shape)
        x = F.transpose_sequence(x)
        #print("x^t = ", x.shape)
        self.eh.reset_state()
        cel = []

        print("x[700] = ", x[700])
        print(x[700].shape)
        abc = self.xe(x[700])
        print("abc = ", abc)
        print(abc.shape)
        abcd = self.eh(abc)
        print("abcd = ", abcd)
        print(abcd.shape)
        abcde = self.ii(abcd)
        print("abcde = ", abcde)
        print(abcde.shape)

        for word in range(len(x)):
            e = self.xe(x[word])
            h = self.eh(e)
            i = self.ii(h)
            cel.append(i)
        # print("cel = ", cel)

        cel_back = []
        self.eh2.reset_state()
        for word in range(1, len(x)):
            ee = self.xe(x[len(x) - word])
            hh = self.eh2(ee)
            i = self.ii(hh)
            cel_back.append(i)
        # print("cel_back = ", cel)

        zz = F.concat((cel[0], cel_back[0]))

        # print("len(zz1) = ", len(zz))
        # print("zz1 = ",len(zz[0]))

        for con in range(1, len(cel) - 1):
            kkk = F.concat((cel[con], cel_back[con]))
            zz = F.concat((zz, kkk))
        # print(zz)
        # print("len(zz2) = ", len(zz))
        # print("zz2 = ",len(zz[0]))
        zzz = F.concat((zz, feature), axis=1)
        # 分類
        # z = F.concat((h,hh))

        y = self.hy(zzz)

        # pp = F.softmax(y)
        # print(pp.data.argmax(axis=1))
        #score = F.sigmoid(y) * 6
        # print("y = ", y)
        # print("new_y", new_y)
        # print("score = ", score)

        # loss = 1 / 2 * ((score - yl) ** 2)

        rmse = 0
        for i in range(BATCH_SIZE):
            rmse += ((y[i] - Label[i])**2)
        rmse = F.sqrt(rmse / BATCH_SIZE)
        print("RMSE = ", rmse.data)
        return rmse, y
    def translate(self, xs, max_length=100):
        print("Now translating")
        batch = len(xs)
        print("batch", batch)
        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            wxs = [
                np.array([source_word_ids.get(w, UNK) for w in x],
                         dtype=np.int32) for x in xs
            ]
            wx_len = [len(wx) for wx in wxs]
            wx_section = np.cumsum(wx_len[:-1])
            valid_wx_section = np.insert(wx_section, 0, 0)
            cxs = [
                np.array(
                    [source_char_ids.get(c, UNK) for c in list("".join(x))],
                    dtype=np.int32) for x in xs
            ]

            wexs = sequence_embed(self.embed_xw, wxs)
            cexs = sequence_embed(self.embed_xc, cxs)

            wexs_f = wexs
            wexs_b = [wex[::-1] for wex in wexs]
            cexs_f = cexs
            cexs_b = [cex[::-1] for cex in cexs]

            _, hfw = self.encoder_fw(None, wexs_f)
            h1, hbw = self.encoder_bw(None, wexs_b)
            _, hfc = self.encoder_fc(None, cexs_f)
            h2, hbc = self.encoder_bc(None, cexs_b)

            hbw = [F.get_item(h, range(len(h))[::-1]) for h in hbw]
            hbc = [F.get_item(h, range(len(h))[::-1]) for h in hbc]
            htw = list(map(lambda x, y: F.concat([x, y], axis=1), hfw, hbw))
            htc = list(map(lambda x, y: F.concat([x, y], axis=1), hfc, hbc))
            ht = list(map(lambda x, y: F.concat([x, y], axis=0), htw, htc))

            ys = self.xp.full(batch, EOS, 'i')
            result = []
            h = F.concat([h1, h2], axis=2)
            for i in range(max_length):
                eys = self.embed_y(ys)
                eys = chainer.functions.split_axis(eys, batch, 0)
                h_list, h_bar_list, c_s_list, z_s_list = self.decoder(
                    h, ht, eys)
                cys = chainer.functions.concat(h_list, axis=0)
                wy = self.W(cys)
                ys = self.xp.argmax(wy.data, axis=1).astype('i')
                result.append(ys)
                h = F.transpose_sequence(h_list)[-1]
                h = F.reshape(h, (self.n_layers, h.shape[0], h.shape[1]))

        result = cuda.to_cpu(self.xp.stack(result).T)

        # Remove EOS taggs
        outs = []
        for y in result:
            inds = np.argwhere(y == EOS)
            if len(inds) > 0:
                y = y[:inds[0, 0]]
            outs.append(y)
        return outs
Esempio n. 20
0
 def __call__(self, *xs):
     return F.transpose_sequence(xs)
Esempio n. 21
0
    def translate(self, xs, max_length=100):
        print("Now translating")
        batch = len(xs)
        print("batch", batch)
        #loss_w = 0
        #loss_c1 = 0
        #loss_c2 = 0
        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            char_hidden = []

            wxs = [
                np.array([source_word_ids.get(w, UNK) for w in x],
                         dtype=np.int32) for x in xs
            ]
            unk_words = list(map(lambda x, y: np.array(y)[x == UNK], wxs, xs))
            unk_xs = list(
                map(
                    lambda x: np.array([
                        np.array(
                            [source_char_ids.get(c, UNK) for c in list(w)],
                            dtype=np.int32) for w in x
                    ]), unk_words))
            unk_pos = [np.where(x == UNK)[0] for x in wxs]
            wx_len = [len(wx) for wx in wxs]
            wx_section = np.cumsum(wx_len[:-1])
            valid_wx_section = np.insert(wx_section, 0, 0)
            concat_wxs = np.concatenate(wxs)

            #wys = [np.array([target_word_ids.get(w, UNK) for w in y], dtype=np.int32) for y in ys]
            #eos = self.xp.array([EOS], 'i')
            #ys_out = [F.concat([y, eos], axis=0) for y in wys]
            #concat_ys_out = F.concat(ys_out, axis=0)
            #n_words = len(concat_ys_out)

            exs = sequence_embed(self.embed_x, wxs)
            exs = list(
                map(
                    lambda s, t, u: get_unk_hidden_vector(
                        s, t, u, self.embed_xc, self.char_encoder, char_hidden
                    ), exs, unk_pos, unk_xs))

            exs_f = exs
            exs_b = [ex[::-1] for ex in exs]
            _, hf = self.encoder_f(None, exs_f)
            _, hb = self.encoder_b(None, exs_b)
            ht = list(map(lambda x, y: F.concat([x, y], axis=1), hf, hb))
            ys = self.xp.full(batch, EOS, 'i')
            result = []
            h_list = None
            for a in range(max_length):
                eys = self.embed_y(ys)
                eys = F.split_axis(eys, batch, 0)
                if h_list == None:
                    h0 = h_list
                else:
                    h0 = F.transpose_sequence(h_list)[-1]
                    h0 = F.reshape(h0,
                                   (self.n_layers, h0.shape[0], h0.shape[1]))
                #h0 : {type:variable, shape:(n_layers*batch*dimentionality)} or None
                h_list, h_bar_list, c_s_list, z_s_list = self.decoder(
                    h0, ht, eys)

                os = h_list
                concat_os = F.concat(os, axis=0)
                concat_os_out = self.W(concat_os)
                concat_pred_w = self.xp.argmax(concat_os_out.data,
                                               axis=1).astype('i')
                is_unk = concat_pred_w == UNK

                if UNK in concat_pred_w:
                    N = np.sum(is_unk)

                    true_wys = concat_ys_out[is_unk]

                    concat_c_s = F.concat(c_s_list, axis=0)
                    concat_h_bar = F.concat(h_bar_list, axis=0)

                    c_ss = concat_c_s[is_unk]
                    h_bars = concat_h_bar[is_unk]
                    c = F.concat([c_ss, h_bars], axis=1)
                    ds_hats = F.relu(self.W_hat(c))

                    abs_z_s_list = [
                        z_s_list[i] + valid_wx_section[i]
                        for i in range(len(z_s_list))
                    ]
                    concat_z_s = F.concat(abs_z_s_list, axis=0)
                    z_ss = concat_z_s[is_unk]

                    #各UNK単語について
                    results_c = []
                    bow = self.xp.array([BOW], 'i')
                    for i in range(N):
                        wy = true_wys[i]
                        if wy != UNK and wy != EOS:
                            cys = np.array([[
                                target_char_ids[c]
                                for c in list(target_words[wy])
                            ]], np.int32)
                        elif wy == UNK:
                            #本来ありえない
                            cys = np.array([[target_char_ids['UNK']]],
                                           np.int32)
                        elif wy == EOS:
                            cys = np.array([[target_char_ids['BOW']]],
                                           np.int32)
                        cys_out = [F.concat([y, bow], axis=0) for y in cys]
                        concat_cys_out = F.concat(cys_out, axis=0)

                        result_c = []
                        cy = self.xp.full(1, BOW, 'i')
                        cy = F.split_axis(cy, 1, 0)
                        cey = sequence_embed(self.embed_yc, cy)
                        z_s = int(z_ss[i].data)
                        ds_hat = F.reshape(ds_hats[i],
                                           (1, 1, ds_hats[i].shape[0]))

                        cos_out_list = []
                        if concat_wxs[z_s] != UNK:
                            for b in range(10):
                                #attentionなし文字ベースdecoder
                                ds_hat, cos = self.char_decoder(ds_hat, cey)
                                cos_out = self.W_char(cos[0])
                                cos_out_list.append(cos_out)
                                pred_cos = self.xp.argmax(cos_out.data,
                                                          axis=1).astype('i')
                                cey = self.embed_yc(pred_cos)
                                print(pred_cos)
                                print(target_chars[pred_cos])
                                result_c.append(pred_cos)
                            #concat_cos_out = F.concat(cos_out_list, axis=0)
                            #loss_c1= loss_c1 + F.sum(F.softmax_cross_entropy(
                            #    concat_cos_out, concat_cys_out, reduce='no'))
                        else:
                            c_ht = char_hidden[z_s]
                            for b in range(10):
                                #attentionあり文字ベースdecoder
                                if b == 0:
                                    c_h0 = ds_hat
                                else:
                                    c_h0 = F.transpose_sequence(h_list)[-1]
                                    c_h0 = F.reshape(
                                        c_h0, (self.n_layers, c_h0.shape[0],
                                               c_h0.shape[1]))
                                c_h_list, c_h_bar_list, c_c_s_list, c_z_s_list = self.char_att_decoder(
                                    c_h0, c_ht, cey)
                                cos_out = self.W_char(h_list[-1])
                                cos_out_list.append(cos_out)
                                pred_cos = self.xp.argmax(cos_out.data,
                                                          axis=1).astype('i')
                                cey = self.embed_yc(pred_cos)
                                print(pred_cos)
                                print(target_chars[pred_cos])
                                result_c.append(pred_cos)
                            #concat_cos_out = F.concat(cos_out_list, axis=0)
                            #loss_c2 = loss_c2 + F.sum(F.softmax_cross_entropy(
                            #    concat_cos_out, concat_cys_out, reduce='no'))
                        r = ""
                        for c in result_c:
                            if c == BOW:
                                break
                            r += target_chars.get(c, UNK)
                        print(r)
                        pred_w = target_word_ids.get(r, UNK)
                        results_c.append(pred_w)
                    concat_pred_w[is_unk] = results_c
                #loss_w = loss_w + F.sum(F.softmax_cross_entropy(
                #    concat_os_out[is_unk!=1], concat_ys_out[is_unk!=1], reduce='no'))
                result.append(concat_pred_w)
            #loss = F.sum(loss_w + Alpha * loss_c1 + Beta * loss_c2) / n_words
        result = cuda.to_cpu(self.xp.stack(result).T)

        # Remove EOS taggs
        outs = []
        for y in result:
            inds = np.argwhere(y == EOS)
            if len(inds) > 0:
                y = y[:inds[0, 0]]
            outs.append(y)
        return outs
Esempio n. 22
0
    def __call__(self, source, bio, tag, compute_loss=True):
        """
        Conduct forward propagation and acquire the loss value
        :return: loss (a chainer variable)
        """
        # Order by a sequence length
        self.inds = np.argsort([-len(x) for x in source]).astype('i')  # Remember the original order
        xs_src = [source[i] for i in self.inds]
        self.xs_src_len = [len(x) for x in xs_src]  # Remember the batch length

        # Forward propagation
        pred_list_bio, pred_list_tag = self.forward(
            source=xs_src,
        )  # batch_size x (sequence_length, 2 * n_units)

        # Calculate the loss
        loss_bio = chainer.Variable(self.xp.array(0, dtype='f'))
        loss_tag = chainer.Variable(self.xp.array(0, dtype='f'))
        # Predict the outputs
        predicts_bio = []
        predicts_tag = []
        # If we use CRFs as output layers
        if self.lossfun == 'crf':
            # ------------------
            # bio
            # ------------------
            hs_bio = F.transpose_sequence(pred_list_bio)  # sequence_length x (batch_size)
            # Loop for each batch and get loss values
            if compute_loss:
                ys_bio = [bio[i] for i in self.inds]
                ts_bio = F.transpose_sequence(ys_bio)  # sequence_length x (batch_size)
                loss_bio = self.crf_bio(hs_bio, ts_bio)
            # Add prediction results
            _, predicts_trans_bio = self.crf_bio.argmax(hs_bio)
            predicts_bio = F.transpose_sequence(predicts_trans_bio)
            # ------------------
            # bio
            # ------------------
            hs_tag = F.transpose_sequence(pred_list_tag)  # sequence_length x (batch_size)
            # Loop for each batch and get loss values
            if compute_loss:
                ys_tag = [tag[i] for i in self.inds]
                ts_tag = F.transpose_sequence(ys_tag)  # sequence_length x (batch_size)
                loss_tag = self.crf_tag(hs_tag, ts_tag)
            # Add prediction results
            _, predicts_trans_tag = self.crf_tag.argmax(hs_tag)
            predicts_tag = F.transpose_sequence(predicts_trans_tag)
        elif self.lossfun == 'softmax':
            # ------------------
            # bio
            # ------------------
            if compute_loss:
                ys_bio = [bio[i] for i in self.inds]
                # Loop for each batch and get loss values
                for p_lst, y_lst in zip(pred_list_bio, ys_bio):
                    loss_bio += F.softmax_cross_entropy(p_lst, y_lst)
                loss_bio /= len(bio)
            # Add prediction results
            for p_lst in pred_list_bio:
                y_arg_bio = F.argmax(p_lst, axis=1)
                predicts_bio.append(y_arg_bio)
            # ------------------
            # tag
            # ------------------
            if compute_loss:
                ys_tag = [tag[i] for i in self.inds]
                # Loop for each batch and get loss values
                for p_lst, y_lst in zip(pred_list_tag, ys_tag):
                    loss_tag += F.softmax_cross_entropy(p_lst, y_lst)
                loss_tag /= len(tag)
            # Add prediction results
            for p_lst in pred_list_tag:
                y_arg_tag = F.argmax(p_lst, axis=1)
                predicts_tag.append(y_arg_tag)

        # Transform variable from GPU to CPU
        cpu_predicts_bio = []
        cpu_predicts_tag = []
        for pred_bio, pred_tag in zip(predicts_bio, predicts_tag):
            cpu_predicts_bio.append(chainer.cuda.to_cpu(pred_bio.data).tolist())
            cpu_predicts_tag.append(chainer.cuda.to_cpu(pred_tag.data).tolist())
        # Re-order
        inds_rev = sorted([(i, ind) for i, ind in enumerate(self.inds)], key=lambda x: x[1])
        cpu_predicts_bio = [cpu_predicts_bio[e_i] for e_i, _ in inds_rev]
        cpu_predicts_tag = [cpu_predicts_tag[e_i] for e_i, _ in inds_rev]

        if compute_loss:
            loss = self.weight_bio * loss_bio + self.weight_tag * loss_tag
            return loss, cpu_predicts_bio, cpu_predicts_tag
        else:
            return cpu_predicts_bio, cpu_predicts_tag
 def _wrapper(self, batch):
     xp = self.xp
     return F.transpose_sequence(
         xp.asarray(self.pad(batch), dtype=self.dtype))
Esempio n. 24
0
 def argmax(self, x):
     ys = self.blstm.GetFeat([Variable(x)])
     #y_t = F.transpose_sequence([self.li(y) for y in ys])
     y_t = F.transpose_sequence(ys)
     _, path = self.crf.argmax(y_t)
     return utils.force_numpy(path).flatten().astype(np.int32)
Esempio n. 25
0
 def f(*xs):
     return functions.transpose_sequence(xs)
    def _test_mask_recurrent_state_at(self, gpu):
        in_size = 2
        out0_size = 2
        out1_size = 3
        par = StatelessRecurrentBranched(
            L.NStepGRU(1, in_size, out0_size, 0),
            StatelessRecurrentSequential(L.NStepLSTM(1, in_size, out1_size,
                                                     0), ),
        )
        if gpu >= 0:
            chainer.cuda.get_device_from_id(gpu).use()
            par.to_gpu()
        xp = par.xp
        seqs_x = [
            xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32),
            xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32),
        ]
        transposed_x = F.transpose_sequence(seqs_x)

        nstep_out, nstep_rs = par.n_step_forward(seqs_x,
                                                 None,
                                                 output_mode='concat')

        # Check if n_step_forward and forward twice results are same
        def no_mask_forward_twice():
            _, rs = par(transposed_x[0], None)
            return par(transposed_x[1], rs)

        nomask_out, nomask_rs = no_mask_forward_twice()
        # GRU
        xp.testing.assert_allclose(
            nstep_out[0].array[[1, 3]],
            nomask_out[0].array,
        )
        # LSTM
        xp.testing.assert_allclose(
            nstep_out[1].array[[1, 3]],
            nomask_out[1].array,
        )
        xp.testing.assert_allclose(nstep_rs[0].array, nomask_rs[0].array)
        self.assertIsInstance(nomask_rs[1], tuple)
        self.assertEqual(len(nomask_rs[1]), 1)
        self.assertEqual(len(nomask_rs[1][0]), 2)
        xp.testing.assert_allclose(nstep_rs[1][0][0].array,
                                   nomask_rs[1][0][0].array)
        xp.testing.assert_allclose(nstep_rs[1][0][1].array,
                                   nomask_rs[1][0][1].array)

        # 1st-only mask forward twice: only 2nd should be the same
        def mask0_forward_twice():
            _, rs = par(transposed_x[0], None)
            rs = par.mask_recurrent_state_at(rs, 0)
            return par(transposed_x[1], rs)

        mask0_out, mask0_rs = mask0_forward_twice()
        # GRU
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[0].array[1],
                mask0_out[0].array[0],
            )
        xp.testing.assert_allclose(
            nstep_out[0].array[3],
            mask0_out[0].array[1],
        )
        # LSTM
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[1].array[1],
                mask0_out[1].array[0],
            )
        xp.testing.assert_allclose(
            nstep_out[1].array[3],
            mask0_out[1].array[1],
        )

        # 2nd-only mask forward twice: only 1st should be the same
        def mask1_forward_twice():
            _, rs = par(transposed_x[0], None)
            rs = par.mask_recurrent_state_at(rs, 1)
            return par(transposed_x[1], rs)

        mask1_out, mask1_rs = mask1_forward_twice()
        # GRU
        xp.testing.assert_allclose(
            nstep_out[0].array[1],
            mask1_out[0].array[0],
        )
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[0].array[3],
                mask1_out[0].array[1],
            )
        # LSTM
        xp.testing.assert_allclose(
            nstep_out[1].array[1],
            mask1_out[1].array[0],
        )
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[1].array[3],
                mask1_out[1].array[1],
            )

        # both 1st and 2nd mask forward twice: both should be different
        def mask01_forward_twice():
            _, rs = par(transposed_x[0], None)
            rs = par.mask_recurrent_state_at(rs, [0, 1])
            return par(transposed_x[1], rs)

        mask01_out, mask01_rs = mask01_forward_twice()
        # GRU
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[0].array[1],
                mask01_out[0].array[0],
            )
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[0].array[3],
                mask01_out[0].array[1],
            )
        # LSTM
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[1].array[1],
                mask01_out[1].array[0],
            )
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[1].array[3],
                mask01_out[1].array[1],
            )

        # get and concat recurrent states and resume forward
        def get_and_concat_rs_forward():
            _, rs = par(transposed_x[0], None)
            rs0 = par.get_recurrent_state_at(rs, 0, unwrap_variable=True)
            rs1 = par.get_recurrent_state_at(rs, 1, unwrap_variable=True)
            concat_rs = par.concatenate_recurrent_states([rs0, rs1])
            return par(transposed_x[1], concat_rs)

        getcon_out, getcon_rs = get_and_concat_rs_forward()
        # GRU
        xp.testing.assert_allclose(
            nstep_out[0].array[1],
            getcon_out[0].array[0],
        )
        xp.testing.assert_allclose(
            nstep_out[0].array[3],
            getcon_out[0].array[1],
        )
        # LSTM
        xp.testing.assert_allclose(
            nstep_out[1].array[1],
            getcon_out[1].array[0],
        )
        xp.testing.assert_allclose(
            nstep_out[1].array[3],
            getcon_out[1].array[1],
        )