Example #1
0
        def _represent_mentions(mention_ids, token_rep):
            '''

            :param mention_ids:
            :param token_rep:
            :return:
            '''
            try:
                if len(mention_ids) == 0:
                    final_mention_representation = F.embed_id(
                        self.xp.asarray([0]).astype('i'), token_rep)
                else:
                    final_mention_representation = F.embed_id(
                        self.xp.asarray(mention_ids).astype('i'), token_rep)
                    if len(mention_ids) > 1:
                        final_mention_representation = F.average(
                            final_mention_representation, axis=0)
            except:
                #TODO: when word is a substring of a word or when word is the NONE, can be split
                '''
                    #TODO:
                        If reason is because the word is a substring, use the index of the enclosing word. In this case, position is the index.
                        If reason is because the word is a NONE word, use the index of the NONE token. In this case, position is the last index.
                        
                        # xs_embed = self.sequence_embed([self.xp.asarray([3]).astype('i')], self.embed_wordtype, TRAIN=False) #index 3 is the NONE word
                        # h, c, word_embedding = self.bilstm(None, None, xs_embed)
                        # none_position_embedding = self.embed_positiontype(self.xp.asarray([self.max_pos - 1]))  # TODO: current is strict, better way is to get the position where it is a subset
                        # final_mention_representation = F.concat((word_embedding[0], none_position_embedding), axis=1)
                '''
                final_mention_representation = F.embed_id(
                    self.xp.asarray([0]).astype('i'), token_rep)
            return final_mention_representation
    def gnn(self, vertex, edge, adjacency, vertex_):

        x_vertex = self.embed_vertex(vertex)
        x_edge = self.embed_edge(edge)
        V, degree = edge.shape

        for _ in range(layer_gnn):

            x_adja = F.embed_id(adjacency, x_vertex, ignore_label=-1)

            h_adja = F.relu(
                self.W_vertex(F.sum(x_adja, 1)) +
                self.W_edge(F.sum(x_edge, 1)))

            x_vertex_ = F.embed_id(vertex_, x_vertex, ignore_label=-1)
            x_vertex_ = F.reshape(x_vertex_, (V * degree, dim))
            x_adja = F.reshape(x_adja, (V * degree, dim))
            h_side = F.relu(self.W_vertex(x_vertex_ + x_adja))
            """Update x_vertex."""
            x_vertex = F.sigmoid(F.relu(self.W_vertex(x_vertex)) + h_adja)
            """Update x_edge."""
            x_edge = F.reshape(x_edge, (V * degree, dim))
            x_edge = F.sigmoid(F.relu(self.W_edge(x_edge)) + h_side)
            x_edge = F.reshape(x_edge, (V, degree, dim))

        y = F.expand_dims(F.sum(x_vertex, 0), 0)

        return y
Example #3
0
 def forward(self, hs_flatten, pairs, ckeys, lengths):
     xp = chainer.cuda.get_array_module(hs_flatten)
     p1, p2 = xp.asarray(pairs.T)
     ckeys = xp.asarray(ckeys)
     h_p1 = F.embed_id(p1, hs_flatten)
     h_p2 = F.embed_id(p2, hs_flatten)
     h_cnext = F.embed_id(ckeys + 1, hs_flatten)
     h_cprev = F.embed_id(ckeys - 1, hs_flatten)
     fs = F.concat((h_p1 - h_cnext, h_p2 - h_cprev), axis=1)
     return fs
Example #4
0
    def train_forward(self,
                      input_ids,
                      output_ids,
                      input_masks=None,
                      output_masks=None):
        input_embeddings = F.embed_id(input_ids, self.source_vocab.embeddings)
        output_embeddings = F.embed_id(output_ids,
                                       self.target_vocab.embeddings)

        encodings = self.encode(input_embeddings, input_masks=input_masks)
        token_probs = self.decode(encodings,
                                  output_embeddings,
                                  input_masks=input_masks,
                                  output_masks=output_masks)
        return token_probs
Example #5
0
    def forward(self, input_ids, input_masks=None, length=None):
        batch_size, input_length = input_ids.shape[0], input_ids.shape[1]
        input_embeddings = F.embed_id(input_ids, self.source_vocab.embeddings)
        encodings = self.encode(input_embeddings, input_masks=input_masks)

        output_probs = None
        output_embeddings = self.target_vocab.embed(
            [self.target_vocab.start_id])
        output_embeddings = F.expand_dims(output_embeddings, 0)
        output_embeddings = F.tile(output_embeddings, (batch_size, 1, 1))

        end_predicted = F.tile(F.reshape(xp.array([False]), (1, 1)),
                               (batch_size, 1))

        all_done = False
        current_length = 0
        while (length is None
               and not all_done) or (length is not None
                                     and current_length < length):
            token_probs = self.decode(encodings,
                                      output_embeddings,
                                      input_masks=input_masks)

            next_token_probs = token_probs[:, -1, :]
            next_token_ids = F.argmax(next_token_probs, axis=-1)
            next_token_embeddings = F.embed_id(next_token_ids,
                                               self.target_vocab.embeddings)
            next_token_embeddings = F.expand_dims(next_token_embeddings,
                                                  axis=1)
            output_embeddings = F.concat(
                [output_embeddings, next_token_embeddings], axis=1)

            next_output_probs = F.expand_dims(next_token_probs, axis=1)
            if output_probs is None:
                output_probs = next_output_probs
            else:
                output_probs = F.concat([output_probs, next_output_probs],
                                        axis=1)

            next_token_end = (next_token_ids.array == self.target_vocab.end_id)
            next_end_predicted = F.expand_dims(
                end_predicted[:, -1].array | next_token_end, -1)
            end_predicted = F.concat([end_predicted, next_end_predicted],
                                     axis=-1)
            all_done = xp.all(next_end_predicted.array)
            current_length += 1

        return output_probs
Example #6
0
 def fit_partial(self, rdoc_ids, rword_indices, window=5):
     doc_ids, word_indices = move(self.xp, rdoc_ids, rword_indices)
     pivot_idx = next(move(self.xp, rword_indices[window:-window]))
     pivot = F.embed_id(pivot_idx, self.sampler.W)
     doc_at_pivot = rdoc_ids[window:-window]
     doc = self.mixture(next(move(self.xp, doc_at_pivot)))
     loss = 0.0
     start, end = window, rword_indices.shape[0] - window
     context = (F.dropout(doc, self.dropout_ratio) +
                F.dropout(pivot, self.dropout_ratio))
     for frame in range(-window, window + 1):
         # Skip predicting the current pivot
         if frame == 0:
             continue
         # Predict word given context and pivot word
         # The target starts before the pivot
         targetidx = rword_indices[start + frame:end + frame]
         doc_at_target = rdoc_ids[start + frame:end + frame]
         doc_is_same = doc_at_target == doc_at_pivot
         rand = np.random.uniform(0, 1, doc_is_same.shape[0])
         mask = (rand > self.word_dropout_ratio).astype('bool')
         weight = np.logical_and(doc_is_same, mask).astype('int32')
         # If weight is 1.0 then targetidx
         # If weight is 0.0 then -1
         targetidx = targetidx * weight + -1 * (1 - weight)
         target, = move(self.xp, targetidx)
         loss = self.sampler(context, target)
         loss.backward()
     return loss.data
Example #7
0
 def fit_partial(self, rdoc_ids, rword_indices, window=5):
     doc_ids, word_indices = move(self.xp, rdoc_ids, rword_indices)
     pivot_idx = next(move(self.xp, rword_indices[window: -window]))
     pivot = F.embed_id(pivot_idx, self.sampler.W)
     doc_at_pivot = rdoc_ids[window: -window]
     doc = self.mixture(next(move(self.xp, doc_at_pivot)))
     loss = 0.0
     start, end = window, rword_indices.shape[0] - window
     context = (F.dropout(doc, self.dropout_ratio) +
                F.dropout(pivot, self.dropout_ratio))
     for frame in range(-window, window + 1):
         # Skip predicting the current pivot
         if frame == 0:
             continue
         # Predict word given context and pivot word
         # The target starts before the pivot
         targetidx = rword_indices[start + frame: end + frame]
         doc_at_target = rdoc_ids[start + frame: end + frame]
         doc_is_same = doc_at_target == doc_at_pivot
         rand = np.random.uniform(0, 1, doc_is_same.shape[0])
         mask = (rand > self.word_dropout_ratio).astype('bool')
         weight = np.logical_and(doc_is_same, mask).astype('int32')
         # If weight is 1.0 then targetidx
         # If weight is 0.0 then -1
         targetidx = targetidx * weight + -1 * (1 - weight)
         target, = move(self.xp, targetidx)
         loss = self.sampler(context, target)
         loss.backward()
     return loss.data
Example #8
0
    def fit_partial(self, rsty_ids, raut_ids, rwrd_ids, window=5):
        sty_ids, aut_ids, wrd_ids = move(self.xp, rsty_ids, raut_ids, rwrd_ids)
        pivot_idx = next(move(self.xp, rwrd_ids[window: -window]))
        pivot = F.embed_id(pivot_idx, self.sampler.W)
        sty_at_pivot = rsty_ids[window: -window]
        aut_at_pivot = raut_ids[window: -window]
        sty = self.mixture_sty(next(move(self.xp, sty_at_pivot)))
#        aut = self.mixture_aut(next(move(self.xp, aut_at_pivot)))
        loss = 0.0
        start, end = window, rwrd_ids.shape[0] - window
        context = F.dropout(pivot, self.dropout_ratio) # + aut + sty
        for frame in range(-window, window + 1):
            # Skip predicting the current pivot
            if frame == 0:
                continue
            # Predict word given context and pivot word
            # The target starts before the pivot
            targetidx = rwrd_ids[start + frame: end + frame]
            sty_at_target = rsty_ids[start + frame: end + frame]
#            aut_at_target = raut_ids[start + frame: end + frame]
            sty_is_same = sty_at_target == sty_at_pivot
#            aut_is_same = aut_at_target == aut_at_pivot
            # Randomly dropout words (default is to never do this)
            rand = np.random.uniform(0, 1, sty_is_same.shape[0])
            mask = (rand > self.word_dropout_ratio).astype('bool')
#            sty_and_aut_are_same = np.logical_and(sty_is_same, aut_is_same)
#            weight = np.logical_and(sty_and_aut_are_same, mask).astype('int32')
            # If weight is 1.0 then targetidx
            # If weight is 0.0 then -1
            targetidx = targetidx # * weight + -1 * (1 - weight)
            target, = move(self.xp, targetidx)
            loss = self.sampler(context, target)
            loss.backward()
        return loss.data
Example #9
0
 def fit_partial(self, rsty_ids, raut_ids, rwrd_ids, window=5):
     sty_ids, aut_ids, wrd_ids = move(self.xp, rsty_ids, raut_ids, rwrd_ids)
     pivot_idx = next(move(self.xp, rwrd_ids[window: -window]))
     pivot = F.embed_id(pivot_idx, self.sampler.W)
     sty_at_pivot = rsty_ids[window: -window]
     aut_at_pivot = raut_ids[window: -window]
     sty = self.mixture_sty(next(move(self.xp, sty_at_pivot)))
     aut = self.mixture_aut(next(move(self.xp, aut_at_pivot)))
     loss = 0.0
     start, end = window, rwrd_ids.shape[0] - window
     context = sty + aut + F.dropout(pivot, self.dropout_ratio)
     for frame in range(-window, window + 1):
         # Skip predicting the current pivot
         if frame == 0:
             continue
         # Predict word given context and pivot word
         # The target starts before the pivot
         targetidx = rwrd_ids[start + frame: end + frame]
         sty_at_target = rsty_ids[start + frame: end + frame]
         aut_at_target = raut_ids[start + frame: end + frame]
         sty_is_same = sty_at_target == sty_at_pivot
         aut_is_same = aut_at_target == aut_at_pivot
         # Randomly dropout words (default is to never do this)
         rand = np.random.uniform(0, 1, sty_is_same.shape[0])
         mask = (rand > self.word_dropout_ratio).astype('bool')
         sty_and_aut_are_same = np.logical_and(sty_is_same, aut_is_same)
         weight = np.logical_and(sty_and_aut_are_same, mask).astype('int32')
         # If weight is 1.0 then targetidx
         # If weight is 0.0 then -1
         targetidx = targetidx * weight + -1 * (1 - weight)
         target, = move(self.xp, targetidx)
         loss = self.sampler(context, target)
         loss.backward()
     return loss.data
Example #10
0
 def loop_function(self, prev, h, output_ptojection=False):
     if output_ptojection:
         prev = prev * self.W + self.b
     prev_symbol = F.argmax(prev, 1)
     emb_prev = F.embed_id(prev_symbol, normalizing(self.embed.W, 1))
     emb_prev = F.concat([emb_prev, h], 1)
     return emb_prev
Example #11
0
    def __call__(self, x_list):
        xs_f = F.embed_id(xp.array(x_list, dtype=xp.int32),
                          self.identity,
                          ignore_label=-1)
        xs_f = xp.reshape(xs_f,
                          (self.batch_size, 1, self.max_len, self.vocab_size))
        conv1 = self.conv_1(xs_f)  # (batch, max(200, width*50), len(word))
        pooled1 = F.sum(
            F.max_pooling_2d(F.tanh(conv1), 3, 3),
            axis=2)  # pool->(batch, max(200, width*50), len(word)/3)
        conv2 = self.conv_2(
            F.pad(xs_f, [(0, 0), (0, 0), (1, 0), (0, 0)], 'constant'))
        pooled2 = F.sum(F.max_pooling_2d(F.tanh(conv2), 3, 3), axis=2)
        conv3 = self.conv_3(
            F.pad(xs_f, [(0, 0), (0, 0), (1, 1), (0, 0)], 'constant'))
        pooled3 = F.sum(F.max_pooling_2d(F.tanh(conv3), 3, 3), axis=2)
        conv4 = self.conv_4(
            F.pad(xs_f, [(0, 0), (0, 0), (2, 1), (0, 0)], 'constant'))
        pooled4 = F.sum(F.max_pooling_2d(F.tanh(conv4), 3, 3), axis=2)
        conv5 = self.conv_5(
            F.pad(xs_f, [(0, 0), (0, 0), (2, 2), (0, 0)], 'constant'))
        pooled5 = F.sum(F.max_pooling_2d(F.tanh(conv5), 3, 3), axis=2)
        conv6 = self.conv_6(
            F.pad(xs_f, [(0, 0), (0, 0), (3, 2), (0, 0)], 'constant'))
        pooled6 = F.sum(F.max_pooling_2d(F.tanh(conv6), 3, 3), axis=2)
        conv7 = self.conv_7(
            F.pad(xs_f, [(0, 0), (0, 0), (3, 3), (0, 0)], 'constant'))
        pooled7 = F.sum(F.max_pooling_2d(F.tanh(conv7), 3, 3), axis=2)

        e = F.concat(
            (pooled1, pooled2, pooled3, pooled4, pooled5, pooled6, pooled7),
            axis=1)  # (batch, max(200, width*50)*7)
        return self.linear(
            self.highway_2(
                self.highway_1(xp.reshape(e, (self.batch_size, 1700)))))
 def position2onehot(self, inds, dim):
     inds = chaFunc.flatten(inds)
     inds = inds.data.astype('float32') % self.max_n_spans
     inds = inds.astype('int32')
     eye = self.xp.identity(dim).astype(self.xp.float32)
     onehot = chaFunc.embed_id(inds, eye)
     return onehot
Example #13
0
 def _populate_features(self, features, batch_index):
     _feats = self.xp.array(features[:, :4].flatten())
     mask = _feats == -1
     fs = F.embed_id(_feats, self.hs[batch_index], ignore_label=-1)
     fs += F.tile(self.pads, (len(features), 1)) \
         * self.xp.expand_dims(mask, axis=1)
     return fs
Example #14
0
    def forward(self, inputs):
        """
        Compute context insensitive token embeddings for ELMo representations.

        Parameters
        ----------
        inputs: ``torch.autograd.Variable``
            Shape ``(batch_size, sequence_length)`` of token ids representing the
            current batch.

        Returns
        -------
        Dict with keys:
        ``'token_embedding'``: ``torch.autograd.Variable``
            Shape ``(batch_size, sequence_length + 2, embedding_dim)`` tensor with context
            insensitive token representations.
        ``'mask'``:  ``torch.autograd.Variable``
            Shape ``(batch_size, sequence_length + 2)`` long tensor with sequence mask.
        """
        # Add BOS/EOS
        # mask = ((inputs > 0).sum(axis=-1) > 0)
        mask = (inputs > 0)

        token_ids_with_bos_eos, mask_with_bos_eos = add_sentence_boundary_token_ids(
            inputs, mask, self._beginning_of_sentence_token,
            self._end_of_sentence_token)

        token_embedding = F.embed_id(token_ids_with_bos_eos,
                                     self._token_embedding_weights)

        # (batch_size, sequence_length, embedding_dim)
        return {'mask': mask_with_bos_eos, 'token_embedding': token_embedding}
Example #15
0
    def forward(self, ws, cs, ls, dep_ts=None):
        ws = map(self.emb_word, ws)
        cs = [F.squeeze(
            F.max_pooling_2d(
                self.conv_char(
                    F.expand_dims(
                        self.emb_char(c), 1)), (int(l[0]), 1)))
                    for c, l in zip(cs, ls)]
        xs_f = [F.dropout(F.concat([w, c]), 0.5) for w, c in zip(ws, cs)]
        xs_b = [x[::-1] for x in xs_f]

        _, _, hs_f = self.lstm_f(None, None, xs_f)
        _, _, hs_b = self.lstm_b(None, None, xs_b)
        hs_b = [x[::-1] for x in hs_b]
        hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, hs_b)]

        dep_ys = [self.biaffine_arc(
            F.elu(F.dropout(self.arc_dep(h), 0.32)),
            F.elu(F.dropout(self.arc_head(h), 0.32))) for h in hs]

        if dep_ts is not None:
            heads = dep_ts
        else:
            heads = [F.argmax(y, axis=1) for y in dep_ys]

        cat_ys = [self.biaffine_tag(
                    F.elu(F.dropout(self.rel_dep(h), 0.32)),
                    F.elu(F.dropout(self.rel_head(
                        F.embed_id(t, h, ignore_label=IGNORE)), 0.32)))
                  for h, t in zip(hs, heads)]

        return cat_ys, dep_ys
Example #16
0
 def forward(self, indexs):
     # print("self.edge2vec:",self.edge2vec[0][0])
     mask = np.random.rand(len(indexs)) >= self.dropout_ratio
     mask = mask*1 
     vecs = F.embed_id(indexs, self.edge2vec).reshape(-1, self.vecDims)
     vecs = vecs.T*mask
     vecs = vecs.T
     return vecs
Example #17
0
 def forward(self, indexs):
     mask = np.random.rand(len(indexs)) >= self.dropout_ratio
     mask = mask*1 
     vecs = F.embed_id(indexs, self.nodeVecs).reshape(-1, self.vecDims)
     # vecs = F.einsum('ij,i->ij', vecs, mask)
     vecs = vecs.T*mask
     vecs = vecs.T
     return vecs
Example #18
0
 def forward(self, ckeys, hs_flatten, lengths):
     n_ckeys = np.array([len(ckeys_i) for ckeys_i in ckeys], np.int32)
     ckeys = [ckeys_i + offset for ckeys_i, offset
              in zip(ckeys, np.insert(lengths, 0, 0)[:-1].cumsum())]
     ckeys = np.concatenate(ckeys).astype(np.int32)
     hs_ckeys = F.embed_id(self.xp.asarray(ckeys), hs_flatten)
     scores = self.linear(hs_ckeys)
     return scores, n_ckeys.cumsum().astype(np.int32)
Example #19
0
 def __init__(self, w):
     super(Encoder, self).__init__()
     self.out_units = 300
     with self.init_scope():
         self.embed = lambda x: F.embed_id(x, w)
         self.encoder = L.NStepLSTM(n_layers=1,
                                    in_size=300,
                                    out_size=self.out_units,
                                    dropout=0.5)
Example #20
0
 def embed_predict(self, examples):
     """Just a forward prediction of given example."""
     # examples (..., 1+L)
     ex = self.embed(examples[..., 1:])  # (..., L, E)
     task_id = F.embed_id(examples[..., 0] - 1,
                          np.eye(TASKS, dtype=np.float32))  # (..., T)
     flat_ex = F.reshape(ex, ex.shape[:-2] + (-1, ))  # (..., L*E)
     combined_ex = F.concat((flat_ex, task_id), axis=-1)  # (..., L*E+T)
     return self.predict(combined_ex)  # (..., V)
Example #21
0
 def embed_predict(self, examples):
     """Just a forward prediction of given example."""
     # examples (..., 1+W*H)
     ex = F.reshape(examples[..., 1:],
                    examples.shape[:-1] + tuple(GRID))  # (..., W, H)
     ex = self.embed(ex)  # (..., W, H, E)
     task_id = F.embed_id(examples[..., 0] - 1,
                          np.eye(TASKS, dtype=np.float32))  # (..., T)
     task_id = F.tile(task_id[..., None, None, :],
                      ex.shape[-3:-1] + (1, ))  # (..., W, H, T)
     combined_ex = F.concat((ex, task_id), axis=-1)  # (..., W, H, E+T)
     return self.predict(combined_ex)  # (..., V)
Example #22
0
    def predict_embed(self,
                      xs,
                      embedW,
                      labels=None,
                      dropout=0.,
                      mode='sampling',
                      temp=1.,
                      word_lower_bound=0.,
                      gold_lower_bound=0.,
                      gumbel=True,
                      residual=0.,
                      wordwise=True,
                      add_original=0.,
                      augment_ratio=0.25):
        x_len = [len(x) for x in xs]
        with chainer.using_config('train', False), chainer.no_backprop_mode():
            t_out_concat = self.encode(xs, labels=labels)
            prob_concat = self.output.output(t_out_concat).data
            prob_concat /= temp
            prob_concat += self.xp.random.gumbel(
                size=prob_concat.shape).astype('f')
            prob_concat = F.softmax(prob_concat).data

        out_concat = F.embed_id(
            self.xp.argmax(prob_concat, axis=1).astype(np.int32), embedW)

        # insert eos
        eos = embedW[0][None]
        new_out = []
        count = 0
        for i, x in enumerate(xs):
            new_out.append(eos)
            new_out.append(out_concat[count:count + len(xs) - 2])
            new_out.append(eos)
            count += len(xs) - 2
        out_concat = F.concat(new_out, axis=0)

        def embed_func(x):
            return F.embed_id(x, embedW, ignore_label=-1)

        raw_concat = F.concat(sequence_embed(embed_func, xs, self.dropout),
                              axis=0)
        b, u = raw_concat.shape

        mask = self.xp.broadcast_to(
            (self.xp.random.rand(b, 1) < augment_ratio), raw_concat.shape)
        out_concat = F.where(mask, out_concat, raw_concat)

        x_len = [len(x) for x in xs]
        x_section = np.cumsum(x_len[:-1])
        out_concat = F.dropout(out_concat, dropout)
        exs = F.split_axis(out_concat, x_section, 0)
        return exs
Example #23
0
 def _extract(start, end):
     spans = []
     start, end, offset = _uniq(start, end)
     ofs, lb, ub = 0, 0, 0
     for k in range(len(start)):
         lb, ub = min(lb, start[k]), max(ub, end[k])
         if ub - lb > block_size and k > 0:
             spans.append(_sum(start[ofs:k], end[ofs:k]))
             ofs, lb, ub = k, start[k], end[k]
     spans.append(_sum(start[ofs:], end[ofs:]))
     spans = F.vstack(spans) / xp.asarray(end - start)[:, None]
     return F.embed_id(xp.asarray(offset), spans)
Example #24
0
    def _feature_repl(hs_flatten, pairs, ckeys, lengths):
        xp = chainer.cuda.get_array_module(hs_flatten)
        begins, ends = pairs.T
        begins_ = xp.asarray(begins)
        ends_ = xp.asarray(ends)
        ckeys_ = xp.asarray(ckeys)

        h_b = F.embed_id(begins_, hs_flatten)
        h_b_pre = F.embed_id(begins_ - 1, hs_flatten, ignore_label=-1)
        out_of_span = np.insert(lengths[:-1].cumsum(), 0, 0) - 1
        is_out_of_span = np.isin(begins - 1, out_of_span)
        h_b_pre = F.where(
            xp.asarray(is_out_of_span)[:, None], xp.zeros_like(h_b_pre.data),
            h_b_pre)
        h_e = F.embed_id(ends_, hs_flatten)
        h_e_post = F.embed_id(ends_ + 1, hs_flatten, hs_flatten.shape[0])
        out_of_span = lengths.cumsum()
        is_out_of_span = np.isin(ends + 1, out_of_span)
        h_e_post = F.where(
            xp.asarray(is_out_of_span)[:, None], xp.zeros_like(h_e_post.data),
            h_e_post)
        h_k_pre = F.embed_id(ckeys_ - 1, hs_flatten)
        h_k_post = F.embed_id(ckeys_ + 1, hs_flatten)

        repl1 = F.absolute(h_b_pre * (h_b - h_k_post))
        repl2 = F.absolute(h_e_post * (h_e - h_k_pre))
        return repl1, repl2
Example #25
0
def _compute_metrics(parsed,
                     gold_batch,
                     lengths,
                     use_predicted_arcs_for_rels=True):
    logits_arc, logits_rel, *_ = parsed
    true_arcs, true_rels, *_ = zip(*gold_batch)

    # exclude attachment from the root
    logits_arc, logits_rel = logits_arc[:, 1:], logits_rel[:, 1:]
    true_arcs = F.pad_sequence(true_arcs, padding=-1)[:, 1:]
    true_rels = F.pad_sequence(true_rels, padding=-1)[:, 1:]
    lengths = np.array(lengths, dtype=np.int32) - 1
    xp = chainer.cuda.get_array_module(logits_arc)
    if xp is not np:
        true_arcs.to_gpu()
        true_rels.to_gpu()

    b, n_deps, n_heads = logits_arc.shape
    logits_arc_flatten = F.reshape(logits_arc, (b * n_deps, n_heads))
    true_arcs_flatten = F.reshape(true_arcs, (b * n_deps, ))
    arc_loss = F.softmax_cross_entropy(logits_arc_flatten,
                                       true_arcs_flatten,
                                       ignore_label=-1)
    arc_accuracy = _accuracy(logits_arc_flatten,
                             true_arcs_flatten,
                             ignore_label=-1)

    if use_predicted_arcs_for_rels:
        parsed_arcs = xp.argmax(logits_arc.data, axis=2)
    else:
        parsed_arcs = true_arcs.data
    parsed_arcs = chainer.cuda.to_cpu(parsed_arcs)
    b, n_deps, n_heads, n_rels = logits_rel.shape
    base1, base2 = n_deps * n_heads, np.arange(n_deps) * n_heads
    parsed_arcs_flatten = np.concatenate(
        [base1 * i + base2 + arcs for i, arcs in enumerate(parsed_arcs)])
    logits_rel_flatten = F.embed_id(xp.asarray(parsed_arcs_flatten),
                                    F.reshape(logits_rel, (b * base1, n_rels)))
    true_rels_flatten = F.reshape(true_rels, (b * n_deps, ))
    rel_loss = F.softmax_cross_entropy(logits_rel_flatten,
                                       true_rels_flatten,
                                       ignore_label=-1)
    rel_accuracy = _accuracy(logits_rel_flatten,
                             true_rels_flatten,
                             ignore_label=-1)

    return {
        'arc_loss': arc_loss,
        'arc_accuracy': arc_accuracy,
        'rel_loss': rel_loss,
        'rel_accuracy': rel_accuracy
    }
Example #26
0
 def __call__(self, x):
     x = F.embed_id(x, self.embed_weights)
     conved = []
     for conv in self.convs:
         h = F.relu(conv(x))
         h = F.max_pooling_2d(h, (2, self.embed_dim))
         conved.append(h)
     # concatenate along conved dimention (axis=2)
     x = F.concat(conved, axis=2)
     x = F.dropout(F.relu(self.fc4(x)), self.dropout)
     if chainer.config.train:
         return self.fc5(x)
     return F.softmax(self.fc5(x))
Example #27
0
File: nets.py Project: Roger-G/DL
def embed_seq_batch(embed, seq_batch, dropout=0., context=None):
    x_len = [len(seq) for seq in seq_batch]
    x_section = np.cumsum(x_len[:-1])
    ex = embed(F.concat(seq_batch, axis=0))
    ex = F.dropout(ex, dropout)
    if context is not None:
        ids = [embed.xp.full((l, ), i).astype('i')
               for i, l in enumerate(x_len)]
        ids = embed.xp.concatenate(ids, axis=0)
        cx = F.embed_id(ids, context)
        ex = F.concat([ex, cx], axis=1)
    exs = F.split_axis(ex, x_section, 0)
    return exs
Example #28
0
    def __call__(self, x_data, x_char_data=None, x_additional=None):
        hx = None
        cx = None
        self.n_length = [len(_x) for _x in x_data]
        self.inds = np.argsort([-len(_x) for _x in x_data]).astype('i')

        if self.use_char:
            # CharCNN
            x_char_data_flat = []
            for _ in x_char_data:
                x_char_data_flat.extend(_)
            char_vecs = self.char_cnn(x_char_data_flat)
            char_index = self.char_cnn.char_index(self.n_length)

        xs = []
        for i, x in enumerate(x_data):
            x = my_variable(x, volatile=not self.train)
            x = self.word_embed(x)

            if self.use_char:
                x_char = F.embed_id(char_index[i], char_vecs, ignore_label=-1)
                x = F.concat([x, x_char], axis=1)

            if x_additional:
                for add_i in six.moves.range(self.n_add_feature):
                    x_add = x_additional[add_i][i]
                    x_add = my_variable(x_add, volatile=not self.train)
                    add_emb_layer = self.get_layer('add_embed_' + str(add_i))
                    x_add = add_emb_layer(x_add)
                    x = F.concat([x, x_add], axis=1)

            x = my_dropout(x, ratio=self.use_dropout, train=self.train)
            xs.append(x)

        _hy_f, _cy_f, h_vecs = self.rnn(
            hx=hx,
            cx=cx,
            xs=xs,
        )

        h_vecs = F.concat(h_vecs, axis=0)
        if self.use_dropout:
            h_vecs = my_dropout(h_vecs,
                                ratio=self.use_dropout,
                                train=self.train)

        # Label Predict
        output = self.output_layer(h_vecs)
        output_list = F.split_axis(output, output.data.shape[0], axis=0)

        return output_list
Example #29
0
    def __init__(self, w):
        # super(Encoder, self).__init__()で別ファイルのスーパークラス(chainer.Chain)のメソッドを呼び出すことが出来る。
        super(Encoder, self).__init__()
        # 300はWord2Vecの次元数
        self.out_units = 300

        # with構文でファイルを扱う
        # Chainクラスで重みの更新がされるのは self.init_scope()内に書いている linkオブジェクト
        with self.init_scope():
            self.embed = lambda x: F.embed_id(x, w)
            # 学習するLSTMの形を設定する
            self.encoder = L.NStepLSTM(n_layers=1,
                                       in_size=300,
                                       out_size=self.out_units,
                                       dropout=0.5)
Example #30
0
    def wsd_with_tc(self, sent, trf_encoded_matrix, labels):

        ### WSD ###

        if self.model_type == "TRF-Multi" or self.model_type == "TRF-Delay-Multi":
            y_wsd = self.wsd_only(trf_encoded_matrix, labels)
        elif self.model_type == "TRF-Sequential":
            y_wsd, task_type = self.wsd_model(sent, None, None,
                                              True)  ## 読み込みsequential

        y_wsd_soft = F.softmax(y_wsd)  ## 予測結果にSoftmaxをかける
        argmax_wsd = F.argmax(y_wsd_soft, axis=1)  ## 最大のインデクス値を取ってくる
        cond = chainer.Variable(
            self.xp.array([
                True if i != "<PAD>" else False for i in list(chain(*labels))
            ]))  ## 語義のラベルがついていない単語は無視するための条件
        pad_array = chainer.Variable(
            -1 * self.xp.ones(argmax_wsd.shape, dtype=argmax_wsd.dtype))
        pad_array_argmax_wsd = F.where(cond, argmax_wsd, pad_array)

        sense_label_embed = F.embed_id(x=pad_array_argmax_wsd,
                                       W=self.xp.array(
                                           self.lookup_table_sense_fixed),
                                       ignore_label=-1)  ## 固定.

        sense_label_embed = sense_label_embed.reshape(
            trf_encoded_matrix.shape[0], trf_encoded_matrix.shape[-1], -1)
        origin_shape = sense_label_embed.shape
        sense_label_embed = F.moveaxis(sense_label_embed, 1, 2)

        ## 置き換え ##
        cond_reshape = cond.reshape(cond.shape[0], -1)
        cond_reshape = F.broadcast_to(
            cond_reshape, (cond_reshape.shape[0], trf_encoded_matrix.shape[1]))
        cond_reshape = cond_reshape.reshape(origin_shape)
        cond_reshape = F.swapaxes(cond_reshape, 1, 2)
        replaced_trf_matrix = F.where(cond_reshape, sense_label_embed,
                                      trf_encoded_matrix)

        ### WSDの予測をTCに組み入れる ###
        tc = replaced_trf_matrix  ## 置換後の文書行列

        ### TC ###
        tc_features = F.sum(tc, axis=2)  ## TC特徴
        y_tc = self.fc2(tc_features)  ### TCの予測結果

        return (y_tc, y_wsd) if (self.model_type == "TRF-Multi") or (
            self.model_type == "TRF-Delay-Multi") else y_tc
Example #31
0
    def forward(self, ws, ss, ps, dep_ts=None):
        batchsize = len(ws)
        xp = chainer.cuda.get_array_module(ws[0])
        split = scanl(lambda x, y: x + y, 0, [w.shape[0] for w in ws])[1:-1]

        wss = self.emb_word(F.hstack(ws))
        sss = F.reshape(self.emb_suf(F.vstack(ss)), (-1, 4 * self.afix_dim))
        pss = F.reshape(self.emb_prf(F.vstack(ps)), (-1, 4 * self.afix_dim))
        ins = F.dropout(F.concat([wss, sss, pss]),
                        self.dropout_ratio,
                        train=self.train)

        xs_f = list(F.split_axis(ins, split, 0))
        xs_b = [x[::-1] for x in xs_f]
        cx_f, hx_f, cx_b, hx_b = self._init_state(xp, batchsize)
        _, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train)
        _, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train)
        hs_b = [x[::-1] for x in hs_b]
        # ys: [(sentence length, number of category)]
        hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, hs_b)]

        dep_ys = [
            self.biaffine_arc(
                F.elu(F.dropout(self.arc_dep(h), 0.32, train=self.train)),
                F.elu(F.dropout(self.arc_head(h), 0.32, train=self.train)))
            for h in hs
        ]

        # if dep_ts is not None and random.random >= 0.5:
        if dep_ts is not None:
            heads = dep_ts
        else:
            heads = [F.argmax(y, axis=1) for y in dep_ys]

        heads = F.elu(F.dropout(
            self.rel_head(
                F.vstack([F.embed_id(t, h, ignore_label=IGNORE) \
                        for h, t in zip(hs, heads)])),
            0.32, train=self.train))

        childs = F.elu(
            F.dropout(self.rel_dep(F.vstack(hs)), 0.32, train=self.train))
        cat_ys = self.biaffine_tag(childs, heads)

        cat_ys = list(F.split_axis(cat_ys, split, 0))

        return cat_ys, dep_ys
Example #32
0
 def sequence_embed(xs):
     """Embed sequences of integers."""
     # xt [(L1,), (L2,), ...]
     xs = list(xs)  # Chainer quirk expects lists
     x_len = [len(x) for x in xs]
     x_section = np.cumsum(x_len[:-1])
     x_concat = F.concat(xs, axis=0)  # (L1+L2...,)
     # ex = self.embed(x_concat) # (..., E)
     ex = F.embed_id(x_concat, wordembeds, ignore_label=0)
     ex = F.tanh(self.embed(ex))  # (..., E)
     uex = self.uni_embed(ex)  # (..., E)
     uvx = self.var_linear(ex)  # (..., 1)
     uvx = F.sigmoid(F.squeeze(uvx, -1))  # (..., )
     # evx = F.concat([ex, uvx[:, None]], -1)  # (..., E+1)
     evxs = F.split_axis(ex, x_section, 0)
     uexs = F.split_axis(uex, x_section, 0)
     uvs = F.split_axis(uvx, x_section, 0)
     return evxs, uexs, uvs
Example #33
0
    def forward(self, inputs):
        """
        Compute context insensitive token embeddings for ELMo representations.

        Parameters
        ----------
        inputs: ``torch.autograd.Variable``
            Shape ``(batch_size, sequence_length)`` of token ids representing the
            current batch.

        Returns
        -------
        Dict with keys:
        ``'token_embedding'``: ``torch.autograd.Variable``
            Shape ``(batch_size, sequence_length + 2, embedding_dim)`` tensor with context
            insensitive token representations.
        ``'mask'``:  ``torch.autograd.Variable``
            Shape ``(batch_size, sequence_length + 2)`` long tensor with sequence mask.
        """
        # Add BOS/EOS
        # mask = ((inputs > 0).sum(axis=-1) > 0)
        mask = (inputs > 0)

        token_ids_with_bos_eos, mask_with_bos_eos = add_sentence_boundary_token_ids(
            inputs,
            mask,
            self._beginning_of_sentence_token,
            self._end_of_sentence_token
        )

        token_embedding = F.embed_id(
            token_ids_with_bos_eos,
            self._token_embedding_weights
        )

        # (batch_size, sequence_length, embedding_dim)
        return {
            'mask': mask_with_bos_eos,
            'token_embedding': token_embedding
        }
Example #34
0
    def forward(self, inputs):
        """
        Compute context insensitive token embeddings for ELMo representations.

        Parameters
        ----------
        inputs: ``torch.autograd.Variable``
            Shape ``(batch_size, sequence_length, 50)`` of character ids representing the
            current batch.

        Returns
        -------
        Dict with keys:
        ``'token_embedding'``: ``torch.autograd.Variable``
            Shape ``(batch_size, sequence_length + 2, embedding_dim)`` tensor with context
            insensitive token representations.
        ``'mask'``:  ``torch.autograd.Variable``
            Shape ``(batch_size, sequence_length + 2)`` long tensor with sequence mask.
        """
        # Add BOS/EOS
        mask = ((inputs > 0).sum(axis=-1) > 0)

        character_ids_with_bos_eos, mask_with_bos_eos = add_sentence_boundary_token_ids(
            inputs,
            mask,
            self._beginning_of_sentence_characters,
            self._end_of_sentence_characters
        )

        # the character id embedding
        max_chars_per_token = self._options['char_cnn']['max_characters_per_token']
        # (batch_size * sequence_length, max_chars_per_token, embed_dim)
        character_embedding = F.embed_id(
            character_ids_with_bos_eos.reshape((-1, max_chars_per_token)),
            self._char_embedding_weights
        )

        # run convolutions
        cnn_options = self._options['char_cnn']
        if cnn_options['activation'] == 'tanh':
            activation = F.tanh
        elif cnn_options['activation'] == 'relu':
            activation = F.relu
        else:
            raise ConfigurationError("Unknown activation")

        # (batch_size * sequence_length, embed_dim, max_chars_per_token)
        character_embedding = F.transpose(character_embedding, (0, 2, 1))
        character_embedding = character_embedding[:, :, :, None]
        convs = []
        for i in range(len(self._convolutions)):
            conv = getattr(self, 'char_conv_{}'.format(i))
            convolved = conv(character_embedding)
            # (batch_size * sequence_length, n_filters for this width)
            convolved = F.max(convolved, axis=(2, 3))
            convolved = activation(convolved)
            convs.append(convolved)

        # (batch_size * sequence_length, n_filters)
        token_embedding = F.concat(convs, axis=-1)

        # apply the highway layers (batch_size * sequence_length, n_filters)
        token_embedding = self._highways.forward(token_embedding)

        # final projection  (batch_size * sequence_length, embedding_dim)
        token_embedding = self._projection(token_embedding)

        # reshape to (batch_size, sequence_length, embedding_dim)
        batch_size, sequence_length, _ = character_ids_with_bos_eos.shape

        return {
            'mask': mask_with_bos_eos,
            'token_embedding': token_embedding.reshape((batch_size, sequence_length, -1))
        }