Exemple #1
0
    def transduce(self, embed_sent):
        src = embed_sent.as_tensor()

        sent_len = src.dim()[0][1]
        src_width = 1
        batch_size = src.dim()[1]
        pad_size = (self.window_receptor -
                    1) / 2  #TODO adapt it also for even window size

        src = dy.concatenate([
            dy.zeroes((self.input_dim, pad_size), batch_size=batch_size), src,
            dy.zeroes((self.input_dim, pad_size), batch_size=batch_size)
        ],
                             d=1)
        padded_sent_len = sent_len + 2 * pad_size

        conv1 = dy.parameter(self.pConv1)
        bias1 = dy.parameter(self.pBias1)
        src_chn = dy.reshape(src, (self.input_dim, padded_sent_len, 1),
                             batch_size=batch_size)
        cnn_layer1 = dy.conv2d_bias(src_chn, conv1, bias1, stride=[1, 1])

        hidden_layer = dy.reshape(cnn_layer1, (self.internal_dim, sent_len, 1),
                                  batch_size=batch_size)
        if self.non_linearity is 'linear':
            hidden_layer = hidden_layer
        elif self.non_linearity is 'tanh':
            hidden_layer = dy.tanh(hidden_layer)
        elif self.non_linearity is 'relu':
            hidden_layer = dy.rectify(hidden_layer)
        elif self.non_linearity is 'sigmoid':
            hidden_layer = dy.logistic(hidden_layer)

        for conv_hid, bias_hid in self.builder_layers:
            hidden_layer = dy.conv2d_bias(hidden_layer,
                                          dy.parameter(conv_hid),
                                          dy.parameter(bias_hid),
                                          stride=[1, 1])
            hidden_layer = dy.reshape(hidden_layer,
                                      (self.internal_dim, sent_len, 1),
                                      batch_size=batch_size)
            if self.non_linearity is 'linear':
                hidden_layer = hidden_layer
            elif self.non_linearity is 'tanh':
                hidden_layer = dy.tanh(hidden_layer)
            elif self.non_linearity is 'relu':
                hidden_layer = dy.rectify(hidden_layer)
            elif self.non_linearity is 'sigmoid':
                hidden_layer = dy.logistic(hidden_layer)
        last_conv = dy.parameter(self.last_conv)
        last_bias = dy.parameter(self.last_bias)
        output = dy.conv2d_bias(hidden_layer,
                                last_conv,
                                last_bias,
                                stride=[1, 1])
        output = dy.reshape(output, (sent_len, self.output_dim),
                            batch_size=batch_size)
        output_seq = ExpressionSequence(expr_tensor=output)
        self._final_states = [FinalTransducerState(output_seq[-1])]
        return output_seq
    def build_graph(self, x):
        conv_W_1 = dy.parameter(self.params['conv_W_1'])
        conv_b_1 = dy.parameter(self.params['conv_b_1'])
        conv_W_2 = dy.parameter(self.params['conv_W_2'])
        conv_b_2 = dy.parameter(self.params['conv_b_2'])
        conv_W_3 = dy.parameter(self.params['conv_W_3'])
        conv_b_3 = dy.parameter(self.params['conv_b_3'])
        W = dy.parameter(self.params['W'])
        b = dy.parameter(self.params['b'])

        (n, d), _ = x.dim()
        x = dy.reshape(x, (1, n, d))

        # 一维卷积网络
        conv_1 = dy.tanh(
            dy.conv2d_bias(x, conv_W_1, conv_b_1, (1, 1), is_valid=False))
        conv_2 = dy.tanh(
            dy.conv2d_bias(x, conv_W_2, conv_b_2, (1, 1), is_valid=False))
        conv_3 = dy.tanh(
            dy.conv2d_bias(x, conv_W_3, conv_b_3, (1, 1), is_valid=False))

        pool_1 = dy.max_dim(dy.reshape(conv_1, (n, self.options['channel_1'])))
        pool_2 = dy.max_dim(dy.reshape(conv_2, (n, self.options['channel_2'])))
        pool_3 = dy.max_dim(dy.reshape(conv_3, (n, self.options['channel_3'])))

        # 全连接分类
        pool = dy.concatenate([pool_1, pool_2, pool_3], 0)
        logit = dy.dot_product(pool, W) + b
        return logit
Exemple #3
0
def do_one_batch(X_batch, Z_batch):
    # Flatten the batch into 1-D vector for workaround
    batch_size = X_batch.shape[0]
    if DO_BATCH:
        X_batch_f = X_batch.flatten('F')
        Z_batch_f = Z_batch.flatten('F')
        x = dy.reshape(dy.inputVector(X_batch_f), (nmf, nframes),
                       batch_size=batch_size)
        z = dy.reshape(dy.inputVector(Z_batch_f), (nvgg),
                       batch_size=batch_size)
        scnn.add_input([X_batch[i] for i in range(X_batch.shape[0])])
        vgg.add_input([Z_batch[i] for i in range(X_batch.shape[0])])

    else:
        x = dy.matInput(X_batch.shape[0], X_batch.shape[1])
        x.set(X_batch.flatten('F'))
        z = dy.vecInput(Z_batch.shape[0])
        z.set(Z_batch.flatten('F'))
        x = dy.reshape(dy.transpose(x, [1, 0]),
                       (1, X_batch.shape[1], X_batch.shape[0]))
    print(x.npvalue().shape)
    a_h1 = dy.conv2d_bias(x, w_i, b_i, [1, 1], is_valid=False)
    h1 = dy.rectify(a_h1)
    h1_pool = dy.kmax_pooling(h1, D[1], d=1)

    a_h2 = dy.conv2d_bias(h1_pool, w_h1, b_h1, [1, 1], is_valid=False)
    h2 = dy.rectify(a_h2)
    h2_pool = dy.kmax_pooling(h2, D[2], d=1)

    a_h3 = dy.conv2d_bias(h2_pool, w_h2, b_h2, [1, 1], is_valid=False)
    h3 = dy.rectify(a_h3)
    h3_pool = dy.kmax_pooling(h3, D[3], d=1)

    h4 = dy.kmax_pooling(h3_pool, 1, d=1)
    h4_re = dy.reshape(h4, (J[3], ))
    #print(h4_re.npvalue().shape)
    g = dy.scalarInput(1.)
    zem_sp = dy.weight_norm(h4_re, g)
    #print(zem_sp.npvalue().shape)
    zem_vgg = w_embed * z + b_embed
    #print(zem_vgg.npvalue().shape)

    sa = dy.transpose(zem_sp) * zem_vgg
    s = dy.rectify(sa)

    if PRINT_EMBED:
        print('Vgg embedding vector:', zem_vgg.npvalue().shape)
        print(zem_vgg.value())

        print('Speech embedding vector:', zem_sp.npvalue().shape)
        print(zem_sp.value())
    if PRINT_SIM:
        print('Raw Similarity:', sa.npvalue())
        print(sa.value())
        print('Similarity:', s.npvalue())
        print(s.value())

    return s
Exemple #4
0
 def apply(self, x_input):
     #print "\tapplying",self.kernel.expr().npvalue().shape,"convolution"
     output_s = dy.conv2d_bias(x_input,
                               self.kernel_s.expr(),
                               self.bias_s.expr(), (self.s_x, self.s_y),
                               is_valid=self.is_valid)
     output_t = dy.conv2d_bias(x_input,
                               self.kernel_t.expr(),
                               self.bias_t.expr(), (self.s_x, self.s_y),
                               is_valid=self.is_valid)
     return dy.cmult(dy.tanh(output_t), dy.logistic(output_s))
Exemple #5
0
 def __call__(self, x, dropout=False):
   if args.conv:
     x = dy.reshape(x, (28, 28, 1))
     x = dy.conv2d_bias(x, self.F1, self.b1, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
     x = dy.conv2d_bias(x, self.F2, self.b2, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))  # 7x7x64
     x = dy.reshape(x, (7 * 7 * 64,))
   h = dy.rectify(self.W1 * x + self.hbias)
   if dropout:
     h = dy.dropout(h, DROPOUT_RATE)
   logits = self.W2 * h
   return logits
Exemple #6
0
 def __call__(self, x, dropout=False):
     if args.conv:
         x = dy.reshape(x, (28, 28, 1))
         x = dy.conv2d_bias(x, self.F1, self.b1, [1, 1], is_valid=False)
         x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
         x = dy.conv2d_bias(x, self.F2, self.b2, [1, 1], is_valid=False)
         x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))  # 7x7x64
         x = dy.reshape(x, (7 * 7 * 64, ))
     h = dy.rectify(self.W1 * x + self.hbias)
     if dropout:
         h = dy.dropout(h, DROPOUT_RATE)
     logits = self.W2 * h
     return logits
Exemple #7
0
def calc_predict_and_activations(wids, tag, words):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE - len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in,
                             W_cnn,
                             b_cnn,
                             stride=(1, 1),
                             is_valid=False)
    filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue()
    activations = filters.argmax(axis=0)

    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE, ))
    pool_out = dy.rectify(pool_out)

    scores = (W_sm * pool_out + b_sm).npvalue()
    print('%d ||| %s' % (tag, ' '.join(words)))
    predict = np.argmax(scores)
    print(display_activations(words, activations))
    print('scores=%s, predict: %d' % (scores, predict))
    features = pool_out.npvalue()
    W = W_sm.npvalue()
    bias = b_sm.npvalue()
    print('  bias=%s' % bias)
    contributions = W * features
    print(' very bad (%.4f): %s' % (scores[0], contributions[0]))
    print('      bad (%.4f): %s' % (scores[1], contributions[1]))
    print('  neutral (%.4f): %s' % (scores[2], contributions[2]))
    print('     good (%.4f): %s' % (scores[3], contributions[3]))
    print('very good (%.4f): %s' % (scores[4], contributions[4]))
Exemple #8
0
def calc_scores(words):
    dy.renew_cg()
    W_cnn_express = dy.parameter(W_cnn)
    b_cnn_express = dy.parameter(b_cnn)
    W_sm_express = dy.parameter(W_sm)
    b_sm_express = dy.parameter(b_sm)
    Waux_sm_express = dy.parameter(Waux_sm)
    baux_sm_express = dy.parameter(baux_sm)
    # basically, win size tells you how many words/chars/pixels (?) we're 'looking at' at each step.
    # Here, 1 unit is 1 word. If a sample has fewer words than win size, then we probably do need some padding.
    # Padd with index 0. (so we're treating the pad words as UNK (?))
    if len(words) < WIN_SIZE:
        words += [0] * (WIN_SIZE-len(words))

    # Convolution + pooling layer
    cnn_in = dy.concatenate([W_emb[x] for x in words], d=1) # concat repr of all words
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn_express, b_cnn_express, stride=(1, 1), is_valid=False)
    pool_out = dy.max_dim(cnn_out, d=1) # Is this max pooling?
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out) # Is this ReLU activation?

    # get scores for either task
    scores_main = W_sm_express * pool_out + b_sm_express
    scores_aux = Waux_sm_express * pool_out + baux_sm_express
    return scores_main, scores_aux
def calc_predict_and_activations(wids, tag, words):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE-len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
    filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue()
    activations = filters.argmax(axis=0)

    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)

    scores = (W_sm * pool_out + b_sm).npvalue()
    print ('%d ||| %s' % (tag, ' '.join(words)))
    predict = np.argmax(scores)
    print (display_activations(words, activations))
    print ('scores=%s, predict: %d' % (scores, predict))
    features = pool_out.npvalue()
    W = W_sm.npvalue()
    bias = b_sm.npvalue()
    print ('  bias=%s' % bias)
    contributions = W * features
    print (' very bad (%.4f): %s' % (scores[0], contributions[0]))
    print ('      bad (%.4f): %s' % (scores[1], contributions[1]))
    print ('  neutral (%.4f): %s' % (scores[2], contributions[2]))
    print ('     good (%.4f): %s' % (scores[3], contributions[3]))
    print ('very good (%.4f): %s' % (scores[4], contributions[4]))
Exemple #10
0
  def transduce(self, es: expression_seqs.ExpressionSequence) -> expression_seqs.ExpressionSequence:
    mask = es.mask
    sent_len = len(es)
    es_expr = es.as_transposed_tensor()
    batch_size = es_expr.dim()[1]

    es_chn = dy.reshape(es_expr, (sent_len, self.freq_dim, self.chn_dim), batch_size=batch_size)

    h_out = {}
    for direction in ["fwd", "bwd"]:
      # input convolutions
      gates_xt_bias = dy.conv2d_bias(es_chn, dy.parameter(self.params["x2all_" + direction]),
                                     dy.parameter(self.params["b_" + direction]), stride=(1, 1), is_valid=False)
      gates_xt_bias_list = [dy.pick_range(gates_xt_bias, i, i + 1) for i in range(sent_len)]

      h = []
      c = []
      for input_pos in range(sent_len):
        directional_pos = input_pos if direction == "fwd" else sent_len - input_pos - 1
        gates_t = gates_xt_bias_list[directional_pos]
        if input_pos > 0:
          # recurrent convolutions
          gates_h_t = dy.conv2d(h[-1], dy.parameter(self.params["h2all_" + direction]), stride=(1, 1), is_valid=False)
          gates_t += gates_h_t

        # standard LSTM logic
        if len(c) == 0:
          c_tm1 = dy.zeros((self.freq_dim * self.num_filters,), batch_size=batch_size)
        else:
          c_tm1 = c[-1]
        gates_t_reshaped = dy.reshape(gates_t, (4 * self.freq_dim * self.num_filters,), batch_size=batch_size)
        c_t = dy.reshape(dy.vanilla_lstm_c(c_tm1, gates_t_reshaped), (self.freq_dim * self.num_filters,),
                         batch_size=batch_size)
        h_t = dy.vanilla_lstm_h(c_t, gates_t_reshaped)
        h_t = dy.reshape(h_t, (1, self.freq_dim, self.num_filters,), batch_size=batch_size)

        if mask is None or np.isclose(np.sum(mask.np_arr[:, input_pos:input_pos + 1]), 0.0):
          c.append(c_t)
          h.append(h_t)
        else:
          c.append(
            mask.cmult_by_timestep_expr(c_t, input_pos, True) + mask.cmult_by_timestep_expr(c[-1], input_pos, False))
          h.append(
            mask.cmult_by_timestep_expr(h_t, input_pos, True) + mask.cmult_by_timestep_expr(h[-1], input_pos, False))

      h_out[direction] = h
    ret_expr = []
    for state_i in range(len(h_out["fwd"])):
      state_fwd = h_out["fwd"][state_i]
      state_bwd = h_out["bwd"][-1 - state_i]
      output_dim = (state_fwd.dim()[0][1] * state_fwd.dim()[0][2],)
      fwd_reshape = dy.reshape(state_fwd, output_dim, batch_size=batch_size)
      bwd_reshape = dy.reshape(state_bwd, output_dim, batch_size=batch_size)
      ret_expr.append(dy.concatenate([fwd_reshape, bwd_reshape], d=0 if self.reshape_output else 2))
    return expression_seqs.ExpressionSequence(expr_list=ret_expr, mask=mask)

  # TODO: implement get_final_states()
Exemple #11
0
    def conv(input_, _=None):
        """Perform the 1D conv.

        :param input: dy.Expression ((1, T, dsz), B)

        Returns:
            dy.Expression ((cmotsz,), B)
        """
        c = dy.conv2d_bias(input_, weight, bias, strides, is_valid=False)
        return act(c)
Exemple #12
0
    def conv(input_, _=None):
        """Perform the 1D conv.

        :param input: dy.Expression ((1, T, dsz), B)

        Returns:
            dy.Expression ((cmotsz,), B)
        """
        c = dy.conv2d_bias(input_, weight, bias, strides, is_valid=False)
        return act(c)
Exemple #13
0
 def transduce(self, encodings):
   inp = encodings
   dim = inp.dim()
   if dim[0][1] < self.ngram_size:
     pad = dy.zeros((self.embed_dim, self.ngram_size-dim[0][1]))
     inp = dy.concatenate([inp, pad], d=1)
     dim = inp.dim()
   inp = dy.reshape(inp, (1, dim[0][1], dim[0][0]))
   encodings = dy.rectify(dy.conv2d_bias(inp, dy.parameter(self.filter), dy.parameter(self.bias), stride=(1, 1), is_valid=True))
   return dy.max_dim(dy.max_dim(encodings, d=1), d=0)
Exemple #14
0
def calc_scores(wids):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE-len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)
    return W_sm * pool_out + b_sm
 def __call__(self, inputs, dropout=False):
     x = dy.inputTensor(inputs)
     conv1 = dy.parameter(self.pConv1)
     b1 = dy.parameter(self.pB1)
     x = dy.conv2d_bias(x, conv1, b1, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
     conv2 = dy.parameter(self.pConv2)
     b2 = dy.parameter(self.pB2)
     x = dy.conv2d_bias(x, conv2, b2, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
     x = dy.reshape(x, (7*7*64, 1))
     w1 = dy.parameter(self.pW1)
     b3 = dy.parameter(self.pB3)
     h = dy.rectify(w1*x+b3)
     if dropout:
         h = dy.dropout(h, DROPOUT_RATE)
     w2 = dy.parameter(self.pW2)
     output = w2*h
     # output = dy.softmax(w2*h)
     return output
Exemple #16
0
 def __call__(self, inputs, dropout=False):
     x = dy.inputTensor(inputs)
     conv1 = dy.parameter(self.pConv1)
     b1 = dy.parameter(self.pB1)
     x = dy.conv2d_bias(x, conv1, b1, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
     conv2 = dy.parameter(self.pConv2)
     b2 = dy.parameter(self.pB2)
     x = dy.conv2d_bias(x, conv2, b2, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
     x = dy.reshape(x, (7 * 7 * 64, 1))
     w1 = dy.parameter(self.pW1)
     b3 = dy.parameter(self.pB3)
     h = dy.rectify(w1 * x + b3)
     if dropout:
         h = dy.dropout(h, DROPOUT_RATE)
     w2 = dy.parameter(self.pW2)
     output = w2 * h
     # output = dy.softmax(w2*h)
     return output
def calc_scores(wids):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE-len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)
    return W_sm * pool_out + b_sm
Exemple #18
0
    def __convolve__(self, embeddings, F, b, W1, bW1):
        sntlen = len(embeddings)
        emb = dy.concatenate_cols(embeddings)

        x = dy.conv2d_bias(emb, F, b, [1, 1], is_valid=False)
        x = dy.rectify(x)
        x = dy.maxpooling2d(x, [1, sntlen], [1, 1], is_valid=True)
        if self.DROPOUT > 0:
            dy.dropout(x, self.DROPOUT)
        f = dy.reshape(x, (self.EMB_DIM * 1 * 100, ))

        return W1 * f + bW1
Exemple #19
0
    def conv(input_):
        """Perform the 1D conv.

        :param input: dy.Expression ((1, T, dsz), B)

        Returns:
            dy.Expression ((cmotsz,), B)
        """
        c = dy.conv2d_bias(input_, weight, bias, strides, is_valid=False)
        activation = dy.rectify(c)
        mot = dy.reshape(dy.max_dim(activation, 1), (cmotsz, ))
        return mot
Exemple #20
0
    def convnet(self, image):
        x = dy.inputTensor(image)

        x = dy.conv2d_bias(x, self.F1, self.b1, [1, 1], is_valid=False)
        x = dy.maxpooling2d(x, [2, 2], [2, 2], is_valid=False)
        x = dy.rectify(x)

        x = dy.conv2d_bias(x, self.F2, self.b2, [1, 1], is_valid=False)
        x = dy.maxpooling2d(x, [2, 2], [2, 2], is_valid=False)
        x = dy.rectify(x)

        x1 = dy.conv2d_bias(x, self.F31, self.b31, [1, 1], is_valid=False)
        x1 = dy.maxpooling2d(x1, [2, 2], [2, 2], is_valid=False)
        x1 = dy.rectify(x1)

        # x2 = dy.conv2d_bias(x, self.F32, self.b32, [1, 1], is_valid=False)
        # x2 = dy.maxpooling2d(x2, [2, 2], [2, 2], is_valid=False)

        x1 = dy.conv2d_bias(x1, self.F41, self.b41, [1, 1], is_valid=False)
        x1 = dy.maxpooling2d(x1, [2, 2], [2, 2], is_valid=False)
        x1 = dy.rectify(x1)

        # x2 = dy.conv2d_bias(x2, self.F42, self.b42, [1, 1], is_valid=False)
        # x2 = dy.maxpooling2d(x2, [2, 2], [2, 2], is_valid=False)

        x1 = dy.conv2d_bias(x1, self.F51, self.b51, [1, 1], is_valid=False)
        x1 = dy.maxpooling2d(x1, [2, 2], [2, 2], is_valid=False)
        x1 = dy.rectify(x1)
        #
        # x2 = dy.conv2d_bias(x2, self.F52, self.b52, [1, 1], is_valid=False)
        # x2 = dy.maxpooling2d(x2, [2, 2], [2, 2], is_valid=False)

        x = dy.reshape(x1, (self.RESHAPING, ))
        # x2 = dy.reshape(x2, (self.RESHAPING,))
        # x = dy.concatenate([x1, x2])

        vector = self.W1 * x + self.bW1
        # vector = self.W2 * vector + self.bW2
        return vector
Exemple #21
0
    def encode(self, word, training=False):
        W_cnn = dy.parameter(self.W_cnn)
        b_cnn = dy.parameter(self.b_cnn)

        embs = dy.concatenate(
            [dy.lookup(self.char_embeds, x) for x in word[:45]], d=1)
        if self.dropout > 0 and training:
            embs = dy.dropout(embs, self.dropout)
        cnn_out = dy.conv2d_bias(
            embs, W_cnn, b_cnn, stride=(1, 1),
            is_valid=False)  # maybe change this? diagram shows padding
        max_pool = dy.max_dim(cnn_out, d=1)
        rep = dy.reshape(dy.tanh(max_pool), (self.filter_size, ))
        return rep
Exemple #22
0
    def conv(input_):
        """Perform the 1D conv.

        :param input: dy.Expression ((1, T, dsz), B)

        Returns:
            dy.Expression ((cmotsz,), B)
        """
        c = dy.conv2d_bias(input_, weight, bias, strides, is_valid=False)
        activation = dy.rectify(c)
        # dy.max_dim(x, d=0) is currently slow (see https://github.com/clab/dynet/issues/1011)
        # So we do the max using max pooling instead.
        ((_, seq_len, _), _) = activation.dim()
        pooled = dy.maxpooling2d(activation, [1, seq_len, 1], strides)
        mot = dy.reshape(pooled, (cmotsz,))
        return mot
Exemple #23
0
    def predict_emb(self, chars):
        dy.renew_cg()

        conv_param = dy.parameter(self.conv)
        conv_param_bias = dy.parameter(self.conv_bias)

        H = dy.parameter(self.cnn_to_rep_params)
        Hb = dy.parameter(self.cnn_to_rep_bias)
        O = dy.parameter(self.mlp_out)
        Ob = dy.parameter(self.mlp_out_bias)

        # padding
        pad_char = self.c2i[PADDING_CHAR]
        padding_size = self.window_width // 2  # TODO also consider w_stride?
        char_ids = ([pad_char] * padding_size) + chars + ([pad_char] *
                                                          padding_size)
        if len(chars) < self.pooling_maxk:
            # allow k-max pooling layer output to transform to affine
            char_ids.extend([pad_char] * (self.pooling_maxk - len(chars)))

        embeddings = dy.concatenate_cols(
            [self.char_lookup[cid] for cid in char_ids])
        reshaped_embeddings = dy.reshape(dy.transpose(embeddings),
                                         (1, len(char_ids), self.char_dim))

        # not using is_valid=False due to maxk-pooling-induced extra padding
        conv_out = dy.conv2d_bias(reshaped_embeddings,
                                  conv_param,
                                  conv_param_bias,
                                  self.stride,
                                  is_valid=True)

        relu_out = dy.rectify(conv_out)

        ### pooling when max_k can only be 1, not sure what other differences may be
        #poolingk = [1, len(chars)]
        #pooling_out = dy.maxpooling2d(relu_out, poolingk, self.stride, is_valid=True)
        #pooling_out_flat = dy.reshape(pooling_out, (self.hidden_dim,))

        ### another possible way for pooling is just max_dim(relu_out, d=1)

        pooling_out = dy.kmax_pooling(relu_out, self.pooling_maxk,
                                      d=1)  # d = what dimension to max over
        pooling_out_flat = dy.reshape(pooling_out,
                                      (self.hidden_dim * self.pooling_maxk, ))

        return O * dy.tanh(H * pooling_out_flat + Hb) + Ob
Exemple #24
0
    def compose(self, embeds):
        if type(embeds) != list:
            embeds = [
                dy.pick_batch_elem(embeds, i) for i in range(embeds.dim()[1])
            ]

        if len(embeds) < self.ngram_size:
            embeds.extend([dy.zeros(self.embed_dim)] *
                          (self.ngram_size - len(embeds)))

        embeds = dy.transpose(
            dy.concatenate([dy.concatenate_cols(embeds)], d=2), [2, 1, 0])
        embeds = dy.conv2d_bias(embeds, self.filter, self.bias,
                                (self.embed_dim, 1))
        embeds = dy.max_dim(dy.pick(embeds, index=0), d=0)

        return self.transform.transform(embeds)
Exemple #25
0
    def _build_tagging_graph(self, words, train_mode=True):
        """
        Builds the computational graph.

        Model similar to http://aclweb.org/anthology/D/D14/D14-1181.pdf.
        """
        dy.renew_cg()
        # turn parameters into expressions
        mlp_output = dy.parameter(self.pO)

        W_cnn_expressions = []
        b_cnn_expressions = []

        for W_cnn, b_cnn in zip(self.W_cnns, self.b_cnns):
            W_cnn_expressions.append(dy.parameter(W_cnn))
            b_cnn_expressions.append(dy.parameter(b_cnn))

        if len(words) < self._cnn_window_size:
            pad_char = "<*>"
            words += [pad_char] * (self._cnn_window_size - len(words))

        if self._char_level:
            cnn_in = dy.concatenate(self._chars_rep(words), d=1)
        else:
            word_reps = [self._word_rep(word) for word in words]
            cnn_in = dy.concatenate(word_reps, d=1)

        pools_out = []
        for W_cnn_express, b_cnn_express in zip(W_cnn_expressions,
                                                b_cnn_expressions):
            cnn_out = dy.conv2d_bias(cnn_in,
                                     W_cnn_express,
                                     b_cnn_express,
                                     stride=(1, 1),
                                     is_valid=False)

            # max-pooling
            pool_out = dy.max_dim(cnn_out, d=1)
            pool_out = dy.reshape(pool_out, (self._cnn_filter_size, ))

            pools_out.append(pool_out)

        pools_concat = dy.concatenate(pools_out)

        return mlp_output * pools_concat
Exemple #26
0
def calc_scores(words):
    dy.renew_cg()
    W_cnn_express = dy.parameter(W_cnn)
    b_cnn_express = dy.parameter(b_cnn)
    W_sm_express = dy.parameter(W_sm)
    b_sm_express = dy.parameter(b_sm)
    # basically, win size tells you how many words/chars/pixels (?) we're 'looking at' at each step.
    # Here, 1 unit is 1 word. If a sample has fewer words than win size, then we probably do need some padding.
    # Padd with index 0. (so we're treating the pad words as UNK (?))
    if len(words) < WIN_SIZE:
        words += [0] * (WIN_SIZE - len(words))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in words],
                            d=1)  # concat repr of all words
    cnn_out = dy.conv2d_bias(cnn_in,
                             W_cnn_express,
                             b_cnn_express,
                             stride=(1, 1),
                             is_valid=False)
    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE, ))
    pool_out = dy.rectify(pool_out)
    return W_sm_express * pool_out + b_sm_express
Exemple #27
0
    def calc_scores(self, sentences, meta_data=None, get_probability=True):
        """
        calculating the score for parallel LSTM network (in a specific state along learning phase)
        :param sentences: list
            list of lists of sentences (represented already as numbers and not letters)
        :param W_emb: model parameter (dynet obj). size:
            matrix holding weights of the mlp phase
        :param W_cnn: model parameter (dynet obj). size:
            vector holding weights of intercept for each hidden state
        :param b_cnn: model parameter (dynet obj). size:
            matrix holding weights of the logisitc regression phase. 2 is there due to the fact we are in a binary
            classification
        :param W_sm: model parameter (dynet obj). size:
            intercept value for the logistic regression phase
        :param b_sm: dict or None

        :return: dynet parameter. size: (2,)
            prediction of the instance to be a drawing one according to the model (vector of 2, first place is the
            probability to be a drawing team)
        """
        #dy.renew_cg()
        # padding with zeros in case sentences are too short
        for words in sentences:
            if len(words) < self.win_size:
                words += [0] * (self.win_size - len(words))

        # looping over each sentence, calculating the CNN max pooling and taking the average at the end
        pool_out_agg = []
        #for cur_sentences in sentences:
        for words in sentences:
            #cnn_in = dy.concatenate([dy.lookup(W_emb, x) for words in cur_sentences for x in words], d=1)
            cnn_in = dy.concatenate([dy.lookup(self.W_emb, x) for x in words],
                                    d=1)
            cnn_out = dy.conv2d_bias(cnn_in,
                                     self.W_cnn,
                                     self.b_cnn,
                                     stride=(1, 1),
                                     is_valid=False)
            pool_out = dy.max_dim(cnn_out, d=1)
            pool_out = dy.reshape(pool_out, (self.filter_size, ))
            pool_out = dy.rectify(pool_out)  # Relu function: max(x_i, 0)
            pool_out_agg.append(pool_out)
        pool_out_avg = dy.average(pool_out_agg)

        if meta_data is None:
            h = dy.tanh((self.W_mlp * pool_out_avg) + self.b_mlp)
            prediction = dy.logistic((self.V_mlp * h) + self.a_mlp)
            if get_probability:
                return prediction
            else:
                return pool_out_avg
        else:
            meta_data_ordered = [
                value for key, value in sorted(meta_data.items())
            ]
            meta_data_vector = dy.inputVector(meta_data_ordered)
            first_layer_avg_and_meta_data = dy.concatenate(
                [pool_out_avg, meta_data_vector])
            h = dy.tanh((self.W_mlp * first_layer_avg_and_meta_data) +
                        self.b_mlp)
            prediction = dy.logistic((self.V_mlp * h) + self.a_mlp)
            if get_probability:
                return prediction
            else:
                return first_layer_avg_and_meta_data
Exemple #28
0
    def transduce(
        self, expr_seq: expression_seqs.ExpressionSequence
    ) -> expression_seqs.ExpressionSequence:
        """
    transduce the sequence, applying masks if given (masked timesteps simply copy previous h / c)

    Args:
      expr_seq: expression sequence (will be accessed via tensor_expr)
    Return:
      expression sequence
    """

        if isinstance(expr_seq, list):
            mask_out = expr_seq[0].mask
            seq_len = len(expr_seq[0])
            batch_size = expr_seq[0].dim()[1]
            tensors = [e.as_tensor() for e in expr_seq]
            input_tensor = dy.reshape(dy.concatenate(tensors),
                                      (seq_len, 1, self.input_dim),
                                      batch_size=batch_size)
        else:
            mask_out = expr_seq.mask
            seq_len = len(expr_seq)
            batch_size = expr_seq.dim()[1]
            input_tensor = dy.reshape(dy.transpose(expr_seq.as_tensor()),
                                      (seq_len, 1, self.input_dim),
                                      batch_size=batch_size)

        if self.dropout > 0.0 and self.train:
            input_tensor = dy.dropout(input_tensor, self.dropout)

        proj_inp = dy.conv2d_bias(input_tensor,
                                  dy.parameter(self.p_f),
                                  dy.parameter(self.p_b),
                                  stride=(self.stride, 1),
                                  is_valid=False)
        reduced_seq_len = proj_inp.dim()[0][0]
        proj_inp = dy.transpose(
            dy.reshape(proj_inp, (reduced_seq_len, self.hidden_dim * 3),
                       batch_size=batch_size))
        # proj_inp dims: (hidden, 1, seq_len), batch_size
        if self.stride > 1 and mask_out is not None:
            mask_out = mask_out.lin_subsampled(trg_len=reduced_seq_len)

        h = [dy.zeroes(dim=(self.hidden_dim, 1), batch_size=batch_size)]
        c = [dy.zeroes(dim=(self.hidden_dim, 1), batch_size=batch_size)]
        for t in range(reduced_seq_len):
            f_t = dy.logistic(
                dy.strided_select(proj_inp, [], [0, t],
                                  [self.hidden_dim, t + 1]))
            o_t = dy.logistic(
                dy.strided_select(proj_inp, [], [self.hidden_dim, t],
                                  [self.hidden_dim * 2, t + 1]))
            z_t = dy.tanh(
                dy.strided_select(proj_inp, [], [self.hidden_dim * 2, t],
                                  [self.hidden_dim * 3, t + 1]))

            if self.dropout > 0.0 and self.train:
                retention_rate = 1.0 - self.dropout
                dropout_mask = dy.random_bernoulli((self.hidden_dim, 1),
                                                   retention_rate,
                                                   batch_size=batch_size)
                f_t = 1.0 - dy.cmult(
                    dropout_mask, 1.0 - f_t
                )  # TODO: would be easy to make a zoneout dynet operation to save memory

            i_t = 1.0 - f_t

            if t == 0:
                c_t = dy.cmult(i_t, z_t)
            else:
                c_t = dy.cmult(f_t, c[-1]) + dy.cmult(i_t, z_t)
            h_t = dy.cmult(
                o_t, c_t)  # note: LSTM would use dy.tanh(c_t) instead of c_t
            if mask_out is None or np.isclose(
                    np.sum(mask_out.np_arr[:, t:t + 1]), 0.0):
                c.append(c_t)
                h.append(h_t)
            else:
                c.append(
                    mask_out.cmult_by_timestep_expr(c_t, t, True) +
                    mask_out.cmult_by_timestep_expr(c[-1], t, False))
                h.append(
                    mask_out.cmult_by_timestep_expr(h_t, t, True) +
                    mask_out.cmult_by_timestep_expr(h[-1], t, False))

        self._final_states = [transducers.FinalTransducerState(dy.reshape(h[-1], (self.hidden_dim,), batch_size=batch_size), \
                                                               dy.reshape(c[-1], (self.hidden_dim,),
                                                                          batch_size=batch_size))]
        return expression_seqs.ExpressionSequence(expr_list=h[1:],
                                                  mask=mask_out)