Esempio n. 1
0
  def transduce(self, es):
    es_expr = es.as_tensor()

    # e.g. es_expr.dim() ==((276, 240), 1)
    sent_len = es_expr.dim()[0][0]
    batch_size=es_expr.dim()[1]
    
    # convolutions won't work if sent length is too short; pad if necessary
    pad_size = 0
    while math.ceil(float(sent_len + pad_size - self.filter_size_time + 1) / float(self.stride[0])) < self.filter_size_time:
      pad_size += 1
    if pad_size>0:
      es_expr = dy.concatenate([es_expr, dy.zeroes((pad_size, self.freq_dim * self.chn_dim), batch_size=es_expr.dim()[1])])
      sent_len += pad_size

    # convolution layers    
    es_chn = dy.reshape(es_expr, (sent_len, self.freq_dim, self.chn_dim), batch_size=batch_size) # ((276, 80, 3), 1)
    cnn_layer1 = dy.conv2d(es_chn, dy.parameter(self.filters1), stride=self.stride, is_valid=True) # ((137, 39, 32), 1)
    cnn_layer2 = dy.conv2d(cnn_layer1, dy.parameter(self.filters2), stride=self.stride, is_valid=True) # ((68, 19, 32), 1)
    cnn_out = dy.reshape(cnn_layer2, (cnn_layer2.dim()[0][0], cnn_layer2.dim()[0][1]*cnn_layer2.dim()[0][2]), batch_size=batch_size) # ((68, 608), 1)
    es_list = [cnn_out[i] for i in range(cnn_out.dim()[0][0])]
    
    # RNN layers
    for (fb, bb) in self.builder_layers:
      fs = fb.initial_state().transduce(es_list)
      bs = bb.initial_state().transduce(reversed(es_list))
      es_list = [dy.concatenate([f, b]) for f, b in zip(fs, reversed(bs))]
    return es_list
Esempio n. 2
0
  def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
    src = src.as_tensor()

    src_height = src.dim()[0][0]
    src_width = src.dim()[0][1]
    # src_channels = 1
    batch_size = src.dim()[1]

    # convolution and pooling layers
    # src dim is ((40, 1000), 128)
    src = padding(src, self.filter_width[0]+3)
    l1 = dy.rectify(dy.conv2d(src, dy.parameter(self.filters1), stride = [self.stride[0], self.stride[0]], is_valid = True)) # ((1, 1000, 64), 128)
    pool1 = dy.maxpooling2d(l1, (1, 4), (1,2), is_valid = True) #((1, 499, 64), 128)

    pool1 = padding(pool1, self.filter_width[1]+3)
    l2 = dy.rectify(dy.conv2d(pool1, dy.parameter(self.filters2), stride = [self.stride[1], self.stride[1]], is_valid = True))# ((1, 499, 512), 128)
    pool2 = dy.maxpooling2d(l2, (1, 4), (1,2), is_valid = True)#((1, 248, 512), 128)

    pool2 = padding(pool2, self.filter_width[2])
    l3 = dy.rectify(dy.conv2d(pool2, dy.parameter(self.filters3), stride = [self.stride[2], self.stride[2]], is_valid = True))# ((1, 248, 1024), 128)
    pool3 = dy.max_dim(l3, d = 1)

    my_norm = dy.l2_norm(pool3) + 1e-6
    output = dy.cdiv(pool3,my_norm)
    output = dy.reshape(output, (self.num_filters[2],), batch_size = batch_size)

    return ExpressionSequence(expr_tensor=output)
Esempio n. 3
0
    def calc_attention(self, state):
        V = dy.parameter(self.pV)
        U = dy.parameter(self.pU)

        WI = self.WI
        curr_sent_mask = self.curr_sent.mask
        if self.attention_vecs:
            conv_feats = dy.conv2d(self.attention_vecs[-1],
                                   self.pL,
                                   stride=[1, 1],
                                   is_valid=False)
            conv_feats = dy.transpose(
                dy.reshape(conv_feats,
                           (conv_feats.dim()[0][0], self.hidden_dim),
                           batch_size=conv_feats.dim()[1]))
            h = dy.tanh(dy.colwise_add(WI + conv_feats, V * state))
        else:
            h = dy.tanh(dy.colwise_add(WI, V * state))
        scores = dy.transpose(U * h)
        if curr_sent_mask is not None:
            scores = curr_sent_mask.add_to_tensor_expr(scores,
                                                       multiplicator=-100.0)
        normalized = dy.softmax(scores)
        self.attention_vecs.append(normalized)
        return normalized
Esempio n. 4
0
 def apply(self, x_input):
     #print "\tapplying",self.kernel.expr().npvalue().shape,"convolution"
     #output_s = dy.conv2d_bias(x_input, self.kernel_s.expr(), self.bias_s.expr(), (self.s_x, self.s_y), is_valid=self.is_valid)
     #output_t = dy.conv2d_bias(x_input, self.kernel_t.expr(), self.bias_t.expr(), (self.s_x, self.s_y), is_valid=self.is_valid)
     #return dy.cmult(dy.tanh(output_t),dy.logistic(output_s))
     output = dy.conv2d(x_input,
                        self.kernel.expr(update=True), (self.s_x, self.s_y),
                        is_valid=self.is_valid)
     return dy.rectify(output)
Esempio n. 5
0
  def transduce(self, es: expression_seqs.ExpressionSequence) -> expression_seqs.ExpressionSequence:
    mask = es.mask
    sent_len = len(es)
    es_expr = es.as_transposed_tensor()
    batch_size = es_expr.dim()[1]

    es_chn = dy.reshape(es_expr, (sent_len, self.freq_dim, self.chn_dim), batch_size=batch_size)

    h_out = {}
    for direction in ["fwd", "bwd"]:
      # input convolutions
      gates_xt_bias = dy.conv2d_bias(es_chn, dy.parameter(self.params["x2all_" + direction]),
                                     dy.parameter(self.params["b_" + direction]), stride=(1, 1), is_valid=False)
      gates_xt_bias_list = [dy.pick_range(gates_xt_bias, i, i + 1) for i in range(sent_len)]

      h = []
      c = []
      for input_pos in range(sent_len):
        directional_pos = input_pos if direction == "fwd" else sent_len - input_pos - 1
        gates_t = gates_xt_bias_list[directional_pos]
        if input_pos > 0:
          # recurrent convolutions
          gates_h_t = dy.conv2d(h[-1], dy.parameter(self.params["h2all_" + direction]), stride=(1, 1), is_valid=False)
          gates_t += gates_h_t

        # standard LSTM logic
        if len(c) == 0:
          c_tm1 = dy.zeros((self.freq_dim * self.num_filters,), batch_size=batch_size)
        else:
          c_tm1 = c[-1]
        gates_t_reshaped = dy.reshape(gates_t, (4 * self.freq_dim * self.num_filters,), batch_size=batch_size)
        c_t = dy.reshape(dy.vanilla_lstm_c(c_tm1, gates_t_reshaped), (self.freq_dim * self.num_filters,),
                         batch_size=batch_size)
        h_t = dy.vanilla_lstm_h(c_t, gates_t_reshaped)
        h_t = dy.reshape(h_t, (1, self.freq_dim, self.num_filters,), batch_size=batch_size)

        if mask is None or np.isclose(np.sum(mask.np_arr[:, input_pos:input_pos + 1]), 0.0):
          c.append(c_t)
          h.append(h_t)
        else:
          c.append(
            mask.cmult_by_timestep_expr(c_t, input_pos, True) + mask.cmult_by_timestep_expr(c[-1], input_pos, False))
          h.append(
            mask.cmult_by_timestep_expr(h_t, input_pos, True) + mask.cmult_by_timestep_expr(h[-1], input_pos, False))

      h_out[direction] = h
    ret_expr = []
    for state_i in range(len(h_out["fwd"])):
      state_fwd = h_out["fwd"][state_i]
      state_bwd = h_out["bwd"][-1 - state_i]
      output_dim = (state_fwd.dim()[0][1] * state_fwd.dim()[0][2],)
      fwd_reshape = dy.reshape(state_fwd, output_dim, batch_size=batch_size)
      bwd_reshape = dy.reshape(state_bwd, output_dim, batch_size=batch_size)
      ret_expr.append(dy.concatenate([fwd_reshape, bwd_reshape], d=0 if self.reshape_output else 2))
    return expression_seqs.ExpressionSequence(expr_list=ret_expr, mask=mask)

  # TODO: implement get_final_states()
 def __call__(self, src):
     src = src.as_tensor()
     # convolutional layer
     src = padding(src,
                   src.dim()[0][0],
                   src.dim()[0][1], self.filter_width, self.stride,
                   src.dim()[1])
     l1 = dy.rectify(
         dy.conv2d(src,
                   dy.parameter(self.filter_conv),
                   stride=[self.stride, self.stride],
                   is_valid=True))
     timestep = l1.dim()[0][1]
     features = l1.dim()[0][2]
     batch_size = l1.dim()[1]
     # transpose l1 to be (timesetp, dim), but keep the batch_size.
     rhn_in = dy.reshape(l1, (timestep, features), batch_size=batch_size)
     rhn_in = [dy.pick(rhn_in, i) for i in range(timestep)]
     for l in range(self.rhn_num_hidden_layers):
         rhn_out = []
         # initialize a random vector for the first state vector, keep the same batch size.
         prev_state = dy.parameter(self.init[l])
         # begin recurrent high way network
         for t in range(timestep):
             for m in range(0, self.rhn_microsteps):
                 H = dy.affine_transform([
                     dy.parameter(self.recur[l][m][1]),
                     dy.parameter(self.recur[l][m][0]), prev_state
                 ])
                 T = dy.affine_transform([
                     dy.parameter(self.recur[l][m][3]),
                     dy.parameter(self.recur[l][m][2]), prev_state
                 ])
                 if m == 0:
                     H += dy.parameter(self.linear[l][0]) * rhn_in[t]
                     T += dy.parameter(self.linear[l][1]) * rhn_in[t]
                 H = dy.tanh(H)
                 T = dy.logistic(T)
                 prev_state = dy.cmult(1 - T, prev_state) + dy.cmult(
                     T, H)  # ((1024, ), batch_size)
             rhn_out.append(prev_state)
         if self.residual and l > 0:
             rhn_out = [sum(x) for x in zip(rhn_out, rhn_in)]
         rhn_in = rhn_out
     # Compute the attention-weighted average of the activations
     rhn_in = dy.concatenate_cols(rhn_in)
     scores = dy.transpose(dy.parameter(self.attention[0][1])) * dy.tanh(
         dy.parameter(self.attention[0][0]) *
         rhn_in)  # ((1,510), batch_size)
     scores = dy.reshape(scores, (scores.dim()[0][1], ),
                         batch_size=scores.dim()[1])
     attn_out = rhn_in * dy.softmax(
         scores
     )  # # rhn_in.as_tensor() is ((1024,510), batch_size) softmax is ((510,), batch_size)
     return ExpressionSequence(expr_tensor=attn_out)
Esempio n. 7
0
    def transduce(self, src):
        src = src.as_tensor()

        src_height = src.dim()[0][0]
        src_width = src.dim()[0][1]
        src_channels = 1
        batch_size = src.dim()[1]

        src = dy.reshape(src, (src_height, src_width, src_channels),
                         batch_size=batch_size)  # ((276, 80, 3), 1)
        # print(self.filters1)
        # convolution and pooling layers
        l1 = dy.rectify(
            dy.conv2d(src,
                      dy.parameter(self.filters1),
                      stride=[self.stride[0], self.stride[0]],
                      is_valid=True))
        pool1 = dy.maxpooling2d(l1, (1, 4), (1, 2), is_valid=True)

        l2 = dy.rectify(
            dy.conv2d(pool1,
                      dy.parameter(self.filters2),
                      stride=[self.stride[1], self.stride[1]],
                      is_valid=True))
        pool2 = dy.maxpooling2d(l2, (1, 4), (1, 2), is_valid=True)

        l3 = dy.rectify(
            dy.conv2d(pool2,
                      dy.parameter(self.filters3),
                      stride=[self.stride[2], self.stride[2]],
                      is_valid=True))

        pool3 = dy.kmax_pooling(l3, 1, d=1)
        # print(pool3.dim())
        output = dy.cdiv(pool3, dy.sqrt(dy.squared_norm(pool3)))
        output = dy.reshape(output, (self.num_filters[2], ),
                            batch_size=batch_size)
        # print("my dim: ", output.dim())

        return ExpressionSequence(expr_tensor=output)
Esempio n. 8
0
    def run_classifier(self, common_top_recur, word_inputs, domain_flag):
        batch_size = word_inputs.shape[1]
        seq_len = word_inputs.shape[0]
        cnn_filter = []
        for filt in self.filter:
            cnn_filter.append(dy.parameter(filt))
        cnn_W = dy.parameter(self.class_W)

        cnn_input = dy.reshape(common_top_recur,
                               (1, seq_len, 2 * self.lstm_hiddens), batch_size)
        # print(cnn_input.npvalue().shape)
        cnn_out_list = []
        for i in range(len(cnn_filter)):
            cnn_out = dy.conv2d(cnn_input,
                                cnn_filter[i], [1, 1],
                                is_valid=False)  # len*batch*filter_num
            # print(cnn_out.npvalue().shape)
            pool_out = dy.max_dim(cnn_out, d=1)
            # print(pool_out.npvalue().shape)
            pool_out = dy.reshape(pool_out, (self.filter_size, ), batch_size)
            # print(pool_out.npvalue().shape)
            pool_out = dy.rectify(pool_out)
            cnn_out_list.append(pool_out)
        final_out = dy.concatenate(cnn_out_list)
        result = cnn_W * final_out

        predict = np.argmax(result.npvalue(), axis=0)
        # print(predict)
        cor = 0.
        for pre in predict:
            if int(pre) == domain_flag:
                cor += 1
        class_accurate = cor / batch_size

        target = [domain_flag] * batch_size  # [0,0,0,0]
        # print(result.npvalue().shape, np.array(target).shape)
        classes_loss = dy.pickneglogsoftmax_batch(result, target)
        class_loss = dy.sum_batches(classes_loss) / batch_size
        # print(class_loss.npvalue().shape)
        return class_loss, class_accurate
Esempio n. 9
0
    def __call__(self, sentence, c2i, maxn_char, act, train=False):
        words_batch = []
        for token in sentence:
            chars_emb = [self.clookup[int(c2i.get(c, 0))] for c in token.chars]
            c2w = dy.concatenate_cols(chars_emb)
            c2w = dy.reshape(c2w, tuple(list(c2w.dim()[0]) + [1]))
            words_batch.append(c2w)

        words_batch = dy.concatenate_to_batch(words_batch)
        convds = [dy.conv2d(words_batch, W, stride=(
            1, 1), is_valid=True) for W in self.Ws]
        actds = [act(convd) for convd in convds]
        poolds = [dy.maxpooling2d(actd, ksize=(1, maxn_char-win_size+1), stride=(1, 1))
                  for win_size, actd in zip(self.win_sizes, actds)]
        words_batch = [dy.reshape(poold, (poold.dim()[0][2],))
                       for poold in poolds]
        words_batch = dy.concatenate([out for out in words_batch])

        c2w_emb = []
        for idx, token in enumerate(sentence):
            c2w_emb.append(dy.pick_batch_elem(words_batch, idx))
        return c2w_emb
Esempio n. 10
0
    def __call__(self, es):
        es_expr = es.as_tensor()

        sent_len = es_expr.dim()[0][0]
        batch_size = es_expr.dim()[1]

        # convolutions won't work if sentence length is too short; pad if necessary
        pad_size = 0
        while self.get_output_len(sent_len + pad_size) < self.filter_size_time:
            pad_size += 1
        if pad_size > 0:
            es_expr = dy.concatenate([
                es_expr,
                dy.zeroes((pad_size, self.freq_dim * self.chn_dim),
                          batch_size=es_expr.dim()[1])
            ])
            sent_len += pad_size

        if es_expr.dim() == ((sent_len, self.freq_dim, self.chn_dim),
                             batch_size):
            es_chn = es_expr
        else:
            es_chn = dy.reshape(es_expr,
                                (sent_len, self.freq_dim, self.chn_dim),
                                batch_size=batch_size)
        cnn_layer = es_chn

        # loop over layers
        for layer_i in range(len(self.filters_layers)):
            cnn_layer_prev = cnn_layer
            filters = self.filters_layers[layer_i]

            # convolution
            cnn_layer = dy.conv2d(cnn_layer,
                                  dy.parameter(filters),
                                  stride=(1, 1),
                                  is_valid=True)

            # non-linearity
            if self.nonlinearity == "rectify":
                cnn_layer = dy.rectify(cnn_layer)
            elif self.nonlinearity == "silu":
                cnn_layer = dy.silu(cnn_layer)
            elif self.nonlinearity is not None:
                raise RuntimeError("unknown nonlinearity: %s" %
                                   self.nonlinearity)

            # max pooling
            if self.pooling[layer_i]:
                cnn_layer = dy.maxpooling2d(cnn_layer, (3, 3),
                                            stride=self.pooling[layer_i],
                                            is_valid=True)

        mask_out = es.mask.lin_subsampled(trg_len=cnn_layer.dim()[0][0])
        if self.output_tensor:
            return expression_seqs.ExpressionSequence(tensor_expr=cnn_layer,
                                                      mask=mask_out)
        else:
            cnn_out = dy.reshape(
                cnn_layer, (cnn_layer.dim()[0][0],
                            cnn_layer.dim()[0][1] * cnn_layer.dim()[0][2]),
                batch_size=batch_size)
            es_list = [cnn_out[i] for i in range(cnn_out.dim()[0][0])]
            return expression_seqs.ExpressionSequence(list_expr=es_list,
                                                      mask=mask_out)
Esempio n. 11
0
    def __call__(self, es):
        es_expr = es.as_tensor()

        sent_len = es_expr.dim()[0][0]
        batch_size = es_expr.dim()[1]

        # convolutions won't work if sentence length is too short; pad if necessary
        pad_size = 0
        while self.get_output_len(sent_len + pad_size) < self.filter_size_time:
            pad_size += 1
        es_expr = self.pad(es_expr, pad_size)
        sent_len += pad_size

        # loop over layers
        if es_expr.dim() == ((sent_len, self.freq_dim, self.chn_dim),
                             batch_size):
            es_chn = es_expr
        else:
            es_chn = dy.reshape(es_expr,
                                (sent_len, self.freq_dim, self.chn_dim),
                                batch_size=batch_size)
        cnn_layer = es_chn
        mask_out = None
        for layer_i in range(len(self.filters_layers)):
            cnn_filter = self.weight_noise(self.filters_layers[layer_i],
                                           self.train)

            if not self.pre_activation:
                cnn_layer = dy.conv2d(
                    cnn_layer,
                    cnn_filter,
                    stride=self.get_stride_for_layer(layer_i),
                    is_valid=True)

            if self.use_bn:
                mask_out = None if es.mask is None else es.mask.lin_subsampled(
                    trg_len=cnn_layer.dim()[0][0])
                cnn_layer = self.bn_layers[layer_i](cnn_layer,
                                                    train=self.train,
                                                    mask=mask_out)

            cnn_layer = self.apply_nonlinearity(self.nonlinearity, cnn_layer)
            self.last_output.append(cnn_layer)

            if self.pre_activation:
                cnn_layer = dy.conv2d(
                    cnn_layer,
                    cnn_filter,
                    stride=self.get_stride_for_layer(layer_i),
                    is_valid=True)

        mask_out = None if es.mask is None else es.mask.lin_subsampled(
            trg_len=cnn_layer.dim()[0][0])
        if self.output_transposed_tensor:
            return expression_seqs.ExpressionSequence(
                expr_transposed_tensor=cnn_layer, mask=mask_out)
        else:
            cnn_out = dy.reshape(
                cnn_layer, (cnn_layer.dim()[0][0],
                            cnn_layer.dim()[0][1] * cnn_layer.dim()[0][2]),
                batch_size=batch_size)
            es_list = [cnn_out[i] for i in range(cnn_out.dim()[0][0])]
            return expression_seqs.ExpressionSequence(expr_list=es_list,
                                                      mask=mask_out)
Esempio n. 12
0
    def get_vecs(self, node):
        #  word -> input vector of LSTM
        need_word_fallback = hasattr(self, "is_train") and self.options.is_train and \
                             hasattr(self, "word_fallback") and \
                             self.options.word_fallback > 0 and \
                             self.random.random() < self.options.word_fallback
        if not node.norm:
            # empty string
            word_vec = self.word_embedding("*EMPTY*")
        elif self.options.cembedding_dims != 0 and self.options.word_threshold > 1 \
                and (node.norm not in self.freq_words or need_word_fallback):
            # use character vector
            char_vecs = [self.char_embedding(i) for i in node.norm]
            if getattr(self.options, "cembedding_type", "rnn") == "rnn":
                char_vecs_o = self.c_lstm(char_vecs)
                word_vec = (char_vecs_o[0] + char_vecs_o[-1]) / 2
            else:
                pad_size = max(self.options.cembedding_filters) - 1
                zero = dn.zeros((self.options.cembedding_dims,))
                char_vecs = [zero] * pad_size + char_vecs + [zero] * pad_size

                pooled_vectors = []
                conv_input = dn.transpose(dn.concatenate(char_vecs, 1))
                conv_input_stacked = dn.reshape(conv_input, conv_input.dim()[0] + (1,))
                cembedding_filter_count = self.options.wembedding_dims / len(self.options.cembedding_filters)
                for filter_size, conv_W in zip(self.options.cembedding_filters, self.c_conv_W.components):
                    conv_W_expr = conv_W.expr()
                    conved = dn.conv2d(conv_input_stacked,
                                       conv_W_expr,
                                       [1, 1])
                    conved = dn.rectify(conved)
                    conved_dim = len(char_vecs) - filter_size + 1
                    pooled = dn.maxpooling2d(conved,
                                             (conved_dim, 1),
                                             (1, 1)
                                             )
                    pooled_vectors.append(dn.reshape(pooled, (cembedding_filter_count,)))
                word_vec = dn.concatenate(pooled_vectors)
        else:
            # use word vector
            word_vec = self.word_embedding(node.norm)
        vecs = [word_vec]

        if self.options.pembedding_dims > 0:
            postag_dropout = getattr(self, "postag_dropout", 0.0)
            pos_vec = self.pos_embedding(node.postag)
            if self.options.is_train and postag_dropout > 0:
                pos_vec = dn.block_dropout(pos_vec, postag_dropout)
            vecs.append(pos_vec)

        if self.options.supertag_embedding > 0:
            supertag_dropout = getattr(self, "supertag_dropout", 0.0)
            supertag_vec = self.supertag_embedding(node.supertag)
            if self.options.is_train and supertag_dropout > 0:
                supertag_vec = dn.block_dropout(supertag_vec, supertag_dropout)
            vecs.append(supertag_vec)

        if self.ext_embedding is not None:
            ext_vec = self.ext_embedding(
                node.form, (node.norm,),
                const=getattr(self.options, "static_ext_embedding", False))
            vecs.append(ext_vec)

        return dn.concatenate(vecs)