def attend(self, input_mat, state, w1dt, w2, v, coverage):
     w2dt = w2 * dy.concatenate(list(state.s()))
     if coverage:
         w1dt = w1dt + self.w_cov * dy.transpose(coverage)
     a_t = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt)))
     a_t = dy.softmax(a_t)
     return a_t, (input_mat * a_t)
Exemple #2
0
    def __call__(self, h, s):
        # hT -> ((L, h_dim), B), s -> ((s_dim, L), B)
        if len(h.dim()[0]) == 2:
            L = h.dim()[0][1]
            if self.h_bias:
                s = dy.concatenate(
                    [s, dy.inputTensor(np.ones((1, L), dtype=np.float32))])
            if self.s_bias:
                h = dy.concatenate(
                    [h, dy.inputTensor(np.ones((1, L), dtype=np.float32))])
        else:
            if self.h_bias:
                s = dy.concatenate(
                    [s, dy.inputTensor(np.ones((1, ), dtype=np.float32))])
            if self.s_bias:
                h = dy.concatenate(
                    [h, dy.inputTensor(np.ones((1, ), dtype=np.float32))])
        hT = dy.transpose(h)
        lin = self.U * s  # ((h_dim*n_label, L), B)
        if self.n_label > 1:
            lin = dy.reshape(lin, (self.h_dim, self.n_label))

        blin = hT * lin
        if self.n_label == 1:
            return blin
        else:
            return dy.transpose(blin)
Exemple #3
0
 def get_score(self,word,context):
 	## Get the loss given word, context pair and perform negative sampling
     objective = dy.logistic(((dy.transpose(self.context_embeddings[context]))*self.word_embeddings[word]))
     negative_sample = np.random.choice(self.context_size, self.num_sampled, replace=False, p=self.context_fre)
     for context_prime in negative_sample:
         objective *= dy.logistic(-((dy.transpose(self.context_embeddings[context_prime]))*self.word_embeddings[word]))
     loss = -dy.log(objective)
     return loss
Exemple #4
0
 def __call__(self, h, s):
     # hT -> ((L, h_dim), B), s -> ((s_dim, L), B)
     hT = dy.transpose(h)
     lin = self.U * s  # ((h_dim*n_label, L), B)
     if self.n_label > 1:
         lin = dy.reshape(lin, (self.h_dim, self.n_label))
     blin = hT * lin
     if self.n_label == 1:
         return blin + (hT * self.B if self.bias else 0)
     else:
         return dy.transpose(blin) + (self.V * dy.concatenate([h, s]) +
                                      self.B if self.bias else 0)
Exemple #5
0
 def __call__(self, h, s):
     if self.h_bias:
         if len(h.dim()[0]) == 2:
             h = dy.concatenate([
                 h,
                 dy.inputTensor(
                     np.ones((1, h.dim()[0][1]), dtype=np.float32))
             ])
         else:
             h = dy.concatenate(
                 [h, dy.inputTensor(np.ones((1, ), dtype=np.float32))])
     if self.s_bias:
         if len(s.dim()[0]) == 2:
             s = dy.concatenate([
                 s,
                 dy.inputTensor(
                     np.ones((1, s.dim()[0][1]), dtype=np.float32))
             ])
         else:
             s = dy.concatenate(
                 [s, dy.inputTensor(np.ones((1, ), dtype=np.float32))])
     lin = self.U * s
     if self.n_label > 1:
         lin = dy.reshape(lin, (self.h_dim, self.n_label))
     blin = dy.transpose(h) * lin
     return blin
Exemple #6
0
    def selection_by_tree(self, tree, mode, idx=0):
        input_layers, pairs = self._select_by_tree(tree, mode, True)
        if len(pairs) == 0:
            if not self.opt['allow_partial']:
                input_layers, pairs = self._select_by_tree(tree, mode, False)
            else:
                print 'early stop! discard {} / {}.'.format(
                    len(tree.V), len(tree.terms))
                return None, None
        W1_rl = dy.parameter(self.model_parameters['W1_rl'])
        b1_rl = dy.parameter(self.model_parameters['b1_rl'])
        if not self.opt['one_layer']:
            W2_rl = dy.parameter(self.model_parameters['W2_rl'])
            b2_rl = dy.parameter(self.model_parameters['b2_rl'])

        # pr = W2_rl * dy.rectify(W1_rl * dy.concatenate_to_batch(input_layers) + b1_rl) + b2_rl
        # (V x N)x160 160x50 50x60 60x1
        input_layers = dy.concatenate_cols(input_layers)
        input_layers = dy.transpose(input_layers)

        if not self.opt['one_layer']:
            if self.opt['use_history']:
                pr = input_layers * dy.rectify(W2_rl * dy.rectify(
                    W1_rl * self.history[idx].output() + b1_rl) + b2_rl)
            else:
                pr = dy.rectify(input_layers * W2_rl + b2_rl) * W1_rl + b1_rl
        else:
            if self.opt['use_history']:
                pr = input_layers * dy.rectify(
                    W1_rl * self.history[idx].output() + b1_rl)
            else:
                pr = input_layers * W1_rl + b1_rl
        # (#actions, )
        pr = dy.reshape(pr, (len(pairs), ))
        return dy.softmax(pr), pairs
Exemple #7
0
    def predict(self,
                feature_vector,
                task_ids,
                train=False,
                soft_labels=False,
                temperature=None,
                dropout_rate=0.0,
                orthogonality_weight=0.0,
                domain_id=None):
        dynet.renew_cg()  # new graph

        feature_vector = feature_vector.toarray()
        feature_vector = np.squeeze(feature_vector, axis=0)

        # self.input = dynet.vecInput(self.vocab_size)
        # self.input.set(feature_vector)
        # TODO this takes too long; can we speed this up somehow?
        input = dynet.inputVector(feature_vector)
        for i in range(self.h_layers):
            if train:  # add some noise
                input = dynet.noise(input, self.noise_sigma)
                input = dynet.dropout(input, dropout_rate)
            input = self.layers[i](input)
        outputs = []
        for task_id in task_ids:
            output = self.output_layers_dict[task_id](input,
                                                      soft_labels=soft_labels,
                                                      temperature=temperature)
            outputs.append(output)

        constraint, adv_loss = 0, 0
        if orthogonality_weight != 0:
            # put the orthogonality constraint either directly on the
            # output layer or on the hidden layer if it's an MLP
            F0_layer = self.output_layers_dict["F0"]
            F1_layer = self.output_layers_dict["F1"]
            F0_param = F0_layer.W_mlp if self.add_hidden else F0_layer.W
            F1_param = F1_layer.W_mlp if self.add_hidden else F1_layer.W
            F0_W = dynet.parameter(F0_param)
            F1_W = dynet.parameter(F1_param)

            # calculate the matrix product of the task matrix with both others
            matrix_product = dynet.transpose(F0_W) * F1_W

            # take the squared Frobenius norm by squaring
            # every element and then summing them
            squared_frobenius_norm = dynet.sum_elems(
                dynet.square(matrix_product))
            constraint += squared_frobenius_norm
            # print('Constraint with first matrix:', squared_frobenius_norm.value())

        if domain_id is not None:
            # flip the gradient when back-propagating through here
            adv_input = dynet.flip_gradient(input)  # last state
            adv_output = self.adv_layer(adv_input)
            adv_loss = self.pick_neg_log(adv_output, domain_id)
            # print('Adversarial loss:', avg_adv_loss.value())
        return outputs, constraint, adv_loss
Exemple #8
0
    def set_initial_states(self, x):
        self.xt_embs = [dy.lookup(self.F, x_t) for x_t in x]

        if self.encoder_type == 'bow':
            self.W_enc = self.W * dy.average(self.xt_embs)

        elif self.encoder_type == 'attention':
            self.xb = dy.concatenate([
                dy.esum(self.xt_embs[max(i - self.q, 0
                                         ):min(len(x) - 1 + 1, i + self.q +
                                               1)]) / self.q
                for i in range(len(x))
            ],
                                     d=1)
            self.xt = dy.transpose(dy.concatenate(self.xt_embs, d=1))
Exemple #9
0
    def __call__(self, x, h_matrix, noprob=False):
        s_t = x
        for i in range(self.layers - 1):
            e_t = self.V[i] * dy.tanh(self.W1[i] * h_matrix + self.W2[i] * s_t)
            a_t = dy.softmax(dy.transpose(e_t))
            c_t = h_matrix * a_t
            s_t = dy.concatenate([x, c_t])

        e_t = self.V[-1] * dy.tanh(self.W1[-1] * h_matrix + self.W2[-1] *
                                   s_t) + self.B1 * h_matrix + self.B2 * s_t

        if len(h_matrix.dim()[0]) > 1:
            e_t = dy.reshape(e_t,
                             (self.V[-1].dim()[0][0] * h_matrix.dim()[0][1], ))
        if not noprob:
            p_t = dy.softmax(e_t)
            return p_t
        else:
            return e_t
def main():
    parser = argparse.ArgumentParser(
        description=
        'Convolutional Neural Networks for Sentence Classification in DyNet')

    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='GPU ID to use. For cpu, set -1 [default: 0]')
    parser.add_argument(
        '--train_x_path',
        type=str,
        default='./data/train_x.txt',
        help='File path of train x data [default: `./data/train_x.txt`]')
    parser.add_argument(
        '--train_y_path',
        type=str,
        default='./data/train_y.txt',
        help='File path of train y data [default: `./data/train_x.txt`]')
    parser.add_argument(
        '--valid_x_path',
        type=str,
        default='./data/valid_x.txt',
        help='File path of valid x data [default: `./data/valid_x.txt`]')
    parser.add_argument(
        '--valid_y_path',
        type=str,
        default='./data/valid_y.txt',
        help='File path of valid y data [default: `./data/valid_y.txt`]')
    parser.add_argument('--n_epochs',
                        type=int,
                        default=10,
                        help='Number of epochs [default: 10]')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        help='Mini batch size [default: 64]')
    parser.add_argument('--win_sizes',
                        type=int,
                        nargs='*',
                        default=[3, 4, 5],
                        help='Window sizes of filters [default: [3, 4, 5]]')
    parser.add_argument(
        '--num_fil',
        type=int,
        default=100,
        help='Number of filters in each window size [default: 100]')
    parser.add_argument('--s',
                        type=float,
                        default=3.0,
                        help='L2 norm constraint on w [default: 3.0]')
    parser.add_argument('--dropout_prob',
                        type=float,
                        default=0.5,
                        help='Dropout probability [default: 0.5]')
    parser.add_argument(
        '--v_strategy',
        type=str,
        default='static',
        help=
        'Embedding strategy. rand: Random  initialization. static: Load pretrained embeddings and do not update during the training. non-static: Load pretrained embeddings and update during the training. [default: static]'
    )
    parser.add_argument(
        '--alloc_mem',
        type=int,
        default=4096,
        help='Amount of memory to allocate [mb] [default: 4096]')
    args = parser.parse_args()
    print(args)

    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)

    N_EPOCHS = args.n_epochs
    WIN_SIZES = args.win_sizes
    BATCH_SIZE = args.batch_size
    EMB_DIM = 300
    OUT_DIM = 1
    L2_NORM_LIM = args.s
    NUM_FIL = args.num_fil
    DROPOUT_PROB = args.dropout_prob
    V_STRATEGY = args.v_strategy
    ALLOC_MEM = args.alloc_mem

    if V_STRATEGY in ['rand', 'static', 'non-static']:
        NUM_CHA = 1
    else:
        NUM_CHA = 2

    # FILE paths
    W2V_PATH = './GoogleNews-vectors-negative300.bin'
    TRAIN_X_PATH = args.train_x_path
    TRAIN_Y_PATH = args.train_y_path
    VALID_X_PATH = args.valid_x_path
    VALID_Y_PATH = args.valid_y_path

    # DyNet setting
    dyparams = dy.DynetParams()
    dyparams.set_random_seed(RANDOM_SEED)
    dyparams.set_mem(ALLOC_MEM)
    dyparams.init()

    # Load pretrained embeddings
    pretrained_model = gensim.models.KeyedVectors.load_word2vec_format(
        W2V_PATH, binary=True)
    vocab = pretrained_model.wv.vocab.keys()
    w2v = pretrained_model.wv

    # Build dataset =======================================================================================================
    w2c = build_w2c(TRAIN_X_PATH, vocab=vocab)
    w2i, i2w = build_w2i(TRAIN_X_PATH, w2c, unk='unk')
    train_x, train_y = build_dataset(TRAIN_X_PATH,
                                     TRAIN_Y_PATH,
                                     w2i,
                                     unk='unk')
    valid_x, valid_y = build_dataset(VALID_X_PATH,
                                     VALID_Y_PATH,
                                     w2i,
                                     unk='unk')

    train_x, train_y = sort_data_by_length(train_x, train_y)
    valid_x, valid_y = sort_data_by_length(valid_x, valid_y)

    VOCAB_SIZE = len(w2i)
    print('VOCAB_SIZE:', VOCAB_SIZE)

    V_init = init_V(w2v, w2i)

    with open(os.path.join(RESULTS_DIR, './w2i.dump'),
              'wb') as f_w2i, open(os.path.join(RESULTS_DIR, './i2w.dump'),
                                   'wb') as f_i2w:
        pickle.dump(w2i, f_w2i)
        pickle.dump(i2w, f_i2w)

    # Build model =================================================================================
    model = dy.Model()
    trainer = dy.AdamTrainer(model)

    # V1
    V1 = model.add_lookup_parameters((VOCAB_SIZE, EMB_DIM))
    if V_STRATEGY in ['static', 'non-static', 'multichannel']:
        V1.init_from_array(V_init)
    if V_STRATEGY in ['static', 'multichannel']:
        V1_UPDATE = False
    else:  # 'rand', 'non-static'
        V1_UPDATE = True
    make_emb_zero(V1, [w2i['<s>'], w2i['</s>']], EMB_DIM)

    # V2
    if V_STRATEGY == 'multichannel':
        V2 = model.add_lookup_parameters((VOCAB_SIZE, EMB_DIM))
        V2.init_from_array(V_init)
        V2_UPDATE = True
        make_emb_zero(V2, [w2i['<s>'], w2i['</s>']], EMB_DIM)

    layers = [
        CNNText(model, EMB_DIM, WIN_SIZES, NUM_CHA, NUM_FIL, dy.tanh,
                DROPOUT_PROB),
        Dense(model, 3 * NUM_FIL, OUT_DIM, dy.logistic)
    ]

    # Train model ================================================================================
    n_batches_train = math.ceil(len(train_x) / BATCH_SIZE)
    n_batches_valid = math.ceil(len(valid_x) / BATCH_SIZE)

    start_time = time.time()
    for epoch in range(N_EPOCHS):
        # Train
        loss_all_train = []
        pred_all_train = []
        for i in tqdm(range(n_batches_train)):
            # Create a new computation graph
            dy.renew_cg()
            associate_parameters(layers)

            # Create a mini batch
            start = i * BATCH_SIZE
            end = start + BATCH_SIZE
            x = build_batch(train_x[start:end], w2i, max(WIN_SIZES)).T
            t = np.array(train_y[start:end])

            sen_len = x.shape[0]

            if V_STRATEGY in ['rand', 'static', 'non-static']:
                x_embs = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs = dy.transpose(x_embs)
                x_embs = dy.reshape(x_embs, (sen_len, EMB_DIM, 1))
            else:  # multichannel
                x_embs1 = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs2 = dy.concatenate_cols(
                    [dy.lookup_batch(V2, x_t, update=V2_UPDATE) for x_t in x])
                x_embs1 = dy.transpose(x_embs1)
                x_embs2 = dy.transpose(x_embs2)
                x_embs = dy.concatenate([x_embs1, x_embs2], d=2)

            t = dy.inputTensor(t, batched=True)
            y = forwards(layers, x_embs, test=False)

            mb_loss = dy.mean_batches(dy.binary_log_loss(y, t))

            # Forward prop
            loss_all_train.append(mb_loss.value())
            pred_all_train.extend(list(binary_pred(y.npvalue().flatten())))

            # Backward prop
            mb_loss.backward()
            trainer.update()

            # L2 norm constraint
            layers[1].scale_W(L2_NORM_LIM)

            # Make padding embs zero
            if V_STRATEGY in ['rand', 'non-static']:
                make_emb_zero(V1, [w2i['<s>'], w2i['</s>']], EMB_DIM)
            elif V_STRATEGY in ['multichannel']:
                make_emb_zero(V2, [w2i['<s>'], w2i['</s>']], EMB_DIM)

        # Valid
        loss_all_valid = []
        pred_all_valid = []
        for i in range(n_batches_valid):
            # Create a new computation graph
            dy.renew_cg()
            associate_parameters(layers)

            # Create a mini batch
            start = i * BATCH_SIZE
            end = start + BATCH_SIZE
            x = build_batch(valid_x[start:end], w2i, max(WIN_SIZES)).T
            t = np.array(valid_y[start:end])

            sen_len = x.shape[0]

            if V_STRATEGY in ['rand', 'static', 'non-static']:
                x_embs = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs = dy.transpose(x_embs)
                x_embs = dy.reshape(x_embs, (sen_len, EMB_DIM, 1))
            else:  # multichannel
                x_embs1 = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs2 = dy.concatenate_cols(
                    [dy.lookup_batch(V2, x_t, update=V2_UPDATE) for x_t in x])
                x_embs1 = dy.transpose(x_embs1)
                x_embs2 = dy.transpose(x_embs2)
                x_embs = dy.concatenate([x_embs1, x_embs2], d=2)

            t = dy.inputTensor(t, batched=True)
            y = forwards(layers, x_embs, test=True)

            mb_loss = dy.mean_batches(dy.binary_log_loss(y, t))

            # Forward prop
            loss_all_valid.append(mb_loss.value())
            pred_all_valid.extend(list(binary_pred(y.npvalue().flatten())))

        print(
            'EPOCH: %d, Train Loss:: %.3f (F1:: %.3f, Acc:: %.3f), Valid Loss:: %.3f (F1:: %.3f, Acc:: %.3f), Time:: %.3f[s]'
            % (
                epoch + 1,
                np.mean(loss_all_train),
                f1_score(train_y, pred_all_train),
                accuracy_score(train_y, pred_all_train),
                np.mean(loss_all_valid),
                f1_score(valid_y, pred_all_valid),
                accuracy_score(valid_y, pred_all_valid),
                time.time() - start_time,
            ))

        # Save model =========================================================================================================================
        if V_STRATEGY in ['rand', 'static', 'non-static']:
            dy.save(os.path.join(RESULTS_DIR, './model_e' + str(epoch + 1)),
                    [V1] + layers)
        else:
            dy.save(os.path.join(RESULTS_DIR, './model_e' + str(epoch + 1)),
                    [V1, V2] + layers)
Exemple #11
0
    def __call__(self, x=None, t=None, test=False):
        if test:
            tt_embs = [dy.lookup(self.E, t_t) for t_t in t]

            if self.encoder_type == 'bow':
                # Neural language model
                tt_c = dy.concatenate(tt_embs)
                h = dy.tanh(self.U * tt_c)

                # Output with softmax
                y_t = dy.softmax(self.V * h + self.W_enc)

            elif self.encoder_type == 'attention':
                ttp_embs = [dy.lookup(self.G, t_t) for t_t in t]

                # Neural language model
                tt_c = dy.concatenate(tt_embs)
                h = dy.tanh(self.U * tt_c)

                # Attention
                ttp_c = dy.concatenate(ttp_embs)
                p = dy.softmax(self.xt * self.P * ttp_c)  # Attention weight
                enc = self.xb * p  # Context vector

                # Output with softmax
                y_t = dy.softmax(self.V * h + self.W * enc)

            return y_t

        else:
            xt_embs = [dy.lookup(self.F, x_t) for x_t in x]
            tt_embs = [dy.lookup(self.E, t_t) for t_t in t]

            y = []
            if self.encoder_type == 'bow':
                # BoW
                enc = dy.average(xt_embs)
                W_enc = self.W * enc
                for i in range(len(t) - self.c + 1):
                    # Neural language model
                    tt_c = dy.concatenate(tt_embs[i:i + self.c])
                    h = dy.tanh(self.U * tt_c)

                    # Output without softmax
                    y_t = self.V * h + W_enc
                    y.append(y_t)

            elif self.encoder_type == 'attention':
                xb = dy.concatenate([
                    dy.esum(xt_embs[max(i - self.q, 0
                                        ):min(len(x) - 1 + 1, i + self.q + 1)])
                    / self.q for i in range(len(x))
                ],
                                    d=1)
                xt = dy.transpose(dy.concatenate(xt_embs, d=1))
                ttp_embs = [dy.lookup(self.G, t_t) for t_t in t]

                for i in range(len(t) - self.c + 1):
                    # Neural language model
                    tt_c = dy.concatenate(tt_embs[i:i + self.c])
                    h = dy.tanh(self.U * tt_c)

                    # Attention
                    ttp_c = dy.concatenate(
                        ttp_embs[i:i + self.c])  # Window-sized embedding
                    p = dy.softmax(xt * self.P * ttp_c)  # Attention weight
                    enc = xb * p  # Context vector

                    # Output without softmax
                    y_t = self.V * h + self.W * enc
                    y.append(y_t)

            return y
Exemple #12
0
    def __call__(self, x, tm1s=None, test=False):
        if test:
            # Initial states
            s_tm1 = tm1s[0]
            c_tm1 = tm1s[1]
            w_tm1 = x

            # GRU
            s_t = self.GRUBuilder.initial_state().set_s([s_tm1]).add_input(
                dy.concatenate([w_tm1, c_tm1])).output()

            # Attention
            e_t = dy.pick(
                self.va *
                dy.tanh(dy.colwise_add(self.Ua * self.hp, self.Wa * s_tm1)), 0)
            a_t = dy.softmax(e_t)
            c_t = dy.esum([
                dy.cmult(a_t_i, h_i)
                for a_t_i, h_i in zip(a_t, dy.transpose(self.hp))
            ])
            #c_t = self.hp*a_t # memory error?

            # Output
            r_t = dy.concatenate_cols([
                Wr_j * w_tm1 + Ur_j * c_t + Vr_j * s_t
                for Wr_j, Ur_j, Vr_j in zip(self.Wr, self.Ur, self.Vr)
            ])  # Maxout
            m_t = dy.max_dim(r_t, d=1)
            y_t = dy.softmax(self.Wo * m_t)

            return s_t, c_t, y_t

        else:
            w_embs = x
            # Initial states
            s_tm1 = self.s_0
            c_tm1 = self.c_0
            GRU = self.GRUBuilder.initial_state().set_s([s_tm1])

            y = []
            for w_tm1 in w_embs:
                # GRU
                GRU = GRU.add_input(dy.concatenate([w_tm1, c_tm1]))
                s_t = GRU.output()

                # Attention
                e_t = dy.pick(
                    self.va * dy.tanh(
                        dy.colwise_add(self.Ua * self.hp, self.Wa * s_tm1)), 0)
                a_t = dy.softmax(e_t)
                c_t = dy.esum([
                    dy.cmult(a_t_i, h_i)
                    for a_t_i, h_i in zip(a_t, dy.transpose(self.hp))
                ])
                #c_t = self.hp*a_t # memory error?

                # Output
                r_t = dy.concatenate_cols([
                    Wr_j * w_tm1 + Ur_j * c_t + Vr_j * s_t
                    for Wr_j, Ur_j, Vr_j in zip(self.Wr, self.Ur, self.Vr)
                ])  # Maxout
                m_t = dy.max_dim(r_t, d=1)

                y_t = self.Wo * m_t
                y.append(y_t)

                # t -> tm1
                s_tm1 = s_t
                c_tm1 = c_t

            return y
Exemple #13
0
def main():
    parser = argparse.ArgumentParser(description='Convolutional Neural Networks for Sentence Classification in DyNet')

    parser.add_argument('--gpu', type=int, default=-1, help='GPU ID to use. For cpu, set -1 [default: -1]')
    parser.add_argument('--model_file', type=str, default='./model', help='Model to use for prediction [default: ./model]')
    parser.add_argument('--input_file', type=str, default='./data/valid_x.txt', help='Input file path [default: ./data/valid_x.txt]')
    parser.add_argument('--output_file', type=str, default='./pred_y.txt', help='Output file path [default: ./pred_y.txt]')
    parser.add_argument('--w2i_file', type=str, default='./w2i.dump', help='Word2Index file path [default: ./w2i.dump]')
    parser.add_argument('--i2w_file', type=str, default='./i2w.dump', help='Index2Word file path [default: ./i2w.dump]')
    parser.add_argument('--alloc_mem', type=int, default=1024, help='Amount of memory to allocate [mb] [default: 1024]')
    args = parser.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)

    MODEL_FILE = args.model_file
    INPUT_FILE = args.input_file
    OUTPUT_FILE = args.output_file
    W2I_FILE = args.w2i_file
    I2W_FILE = args.i2w_file
    ALLOC_MEM = args.alloc_mem

    # DyNet setting
    dyparams = dy.DynetParams()
    dyparams.set_mem(ALLOC_MEM)
    dyparams.init()

    # Load model
    model = dy.Model()
    pretrained_model = dy.load(MODEL_FILE, model)
    if len(pretrained_model) == 3:
        V1, layers = pretrained_model[0], pretrained_model[1:]
        MULTICHANNEL = False
    else:
        V1, V2, layers = pretrained_model[0], pretrained_model[1], pretrained_model[2:]
        MULTICHANNEL = True

    EMB_DIM = V1.shape()[0]
    WIN_SIZES = layers[0].win_sizes

    # Load test data
    with open(W2I_FILE, 'rb') as f_w2i, open(I2W_FILE, 'rb') as f_i2w:
        w2i = pickle.load(f_w2i)
        i2w = pickle.load(f_i2w)

    max_win = max(WIN_SIZES)
    test_X, _, _ = build_dataset(INPUT_FILE, w2i=w2i, unksym='unk')
    test_X = [[0]*max_win + instance_x + [0]*max_win for instance_x in test_X]

    # Pred
    pred_y = []
    for instance_x in tqdm(test_X):
        # Create a new computation graph
        dy.renew_cg()
        associate_parameters(layers)

        sen_len = len(instance_x)

        if MULTICHANNEL:
            x_embs1 = dy.concatenate([dy.lookup(V1, x_t, update=False) for x_t in instance_x], d=1)
            x_embs2 = dy.concatenate([dy.lookup(V2, x_t, update=False) for x_t in instance_x], d=1)
            x_embs1 = dy.transpose(x_embs1)
            x_embs2 = dy.transpose(x_embs2)
            x_embs = dy.concatenate([x_embs1, x_embs2], d=2)
        else:
            x_embs = dy.concatenate([dy.lookup(V1, x_t, update=False) for x_t in instance_x], d=1)
            x_embs = dy.transpose(x_embs)
            x_embs = dy.reshape(x_embs, (sen_len, EMB_DIM, 1))

        y = f_props(layers, x_embs, train=False)
        pred_y.append(str(int(binary_pred(y.value()))))

    with open(OUTPUT_FILE, 'w') as f:
        f.write('\n'.join(pred_y))
Exemple #14
0
 def __call__(self, s_t, h_matrix):
     e_t = self.v * dy.tanh(self.W1*h_matrix + self.W2 * s_t)
     a_t = dy.softmax(dy.transpose(e_t))
     c_t = h_matrix * a_t
     return c_t
Exemple #15
0
    def predict(self,
                word_indices,
                char_indices,
                task_id,
                train=False,
                soft_labels=False,
                temperature=None,
                orthogonality_weight=0.0,
                domain_id=None):
        """
        predict tags for a sentence represented as char+word embeddings
        :param domain_id: Predict adversarial loss if domain id is provided.
        """
        dynet.renew_cg()  # new graph

        char_emb = []
        rev_char_emb = []

        wfeatures = [self.wembeds[w] for w in word_indices]

        if self.c_in_dim > 0:
            # get representation for words
            for chars_of_token in char_indices:
                char_feats = [self.cembeds[c] for c in chars_of_token]
                # use last state as word representation
                f_char, b_char = self.char_rnn.predict_sequence(
                    char_feats, char_feats)
                last_state = f_char[-1]
                rev_last_state = b_char[-1]
                char_emb.append(last_state)
                rev_char_emb.append(rev_last_state)

            features = [
                dynet.concatenate([w, c, rev_c])
                for w, c, rev_c in zip(wfeatures, char_emb, rev_char_emb)
            ]
        else:
            features = wfeatures

        if train:  # only do at training time
            features = [dynet.noise(fe, self.noise_sigma) for fe in features]

        output_expected_at_layer = self.h_layers
        output_expected_at_layer -= 1

        # go through layers
        prev = features
        prev_rev = features
        num_layers = self.h_layers
        constraint = 0
        adv_loss = 0
        for i in range(0, num_layers):
            predictor = self.predictors["inner"][i]
            forward_sequence, backward_sequence = predictor.predict_sequence(
                prev, prev_rev)
            if i > 0 and self.activation:
                # activation between LSTM layers
                forward_sequence = [
                    self.activation(s) for s in forward_sequence
                ]
                backward_sequence = [
                    self.activation(s) for s in backward_sequence
                ]

            if i == output_expected_at_layer:

                concat_layer = [
                    dynet.concatenate([f, b]) for f, b in zip(
                        forward_sequence, reversed(backward_sequence))
                ]
                if train and self.noise_sigma > 0.0:
                    concat_layer = [
                        dynet.noise(fe, self.noise_sigma)
                        for fe in concat_layer
                    ]

                if task_id not in ["src", "trg"]:
                    output_predictor = self.predictors["output_layers_dict"][
                        task_id]
                    output = output_predictor.predict_sequence(
                        concat_layer,
                        soft_labels=soft_labels,
                        temperature=temperature)
                else:
                    # one src example for all three outputs
                    output = []  # in this case it is a list
                    for t_id in self.task_ids:
                        output_predictor = self.predictors[
                            "output_layers_dict"][t_id]
                        output_t = output_predictor.predict_sequence(
                            concat_layer,
                            soft_labels=soft_labels,
                            temperature=temperature)
                        output.append(output_t)

                if orthogonality_weight != 0 and task_id != "Ft":
                    # put the orthogonality constraint either directly on the
                    # output layer or on the hidden layer if it's an MLP
                    # use orthogonality_weight only between F0 and F1
                    builder = self.predictors["output_layers_dict"][
                        "F0"].network_builder
                    task_param = builder.W_mlp if self.add_hidden else builder.W
                    task_W = dynet.parameter(task_param)

                    builder = self.predictors["output_layers_dict"][
                        "F1"].network_builder
                    other_param = builder.W_mlp if self.add_hidden else builder.W
                    other_task_W = dynet.parameter(other_param)

                    # calculate the matrix product of the task matrix with the other
                    matrix_product_1 = dynet.transpose(task_W) * other_task_W

                    # take the squared Frobenius norm by squaring
                    # every element and then summing them
                    squared_frobenius_norm = dynet.sum_elems(
                        dynet.square(matrix_product_1))
                    constraint = squared_frobenius_norm

                    #print('Constraint with first matrix:', squared_frobenius_norm.value())

                if domain_id is not None:
                    # flip the gradient when back-propagating through here
                    adv_input = dynet.flip_gradient(
                        concat_layer[-1])  # last state
                    adv_output = self.adv_layer(adv_input)
                    adv_loss = self.pick_neg_log(adv_output, domain_id)
                    #print('Adversarial loss:', avg_adv_loss.value())

                # output is list if task_id = 'src'
                return output, constraint, adv_loss

            prev = forward_sequence
            prev_rev = backward_sequence

        raise Exception("oops should not be here")
        return None