Example #1
0
    def evaluate_network_from_embs(self, wembs, renew=True):
        params = self.params
        if renew:
            dy.renew_cg()
        builders = params["builders"]
        W = params["W"]
        v = params["v"]

        lstms = [b.initial_state() for b in builders]

        # wembs = [dy.noise(we, 0.1) for we in wembs]

        # running the first level for getting b

        fw_lstm1 = lstms[0].transduce(wembs)
        bw_lstm1 = reversed(lstms[1].transduce(reversed(wembs)))

        inputs_to_2nd_layer = [
            dy.concatenate([f, b]) for f, b in zip(fw_lstm1, bw_lstm1)
        ]

        fw_lstm2 = lstms[2].transduce(inputs_to_2nd_layer)
        bw_lstm2 = reversed(lstms[3].transduce(reversed(inputs_to_2nd_layer)))

        y = [dy.concatenate([f, b]) for f, b in zip(fw_lstm2, bw_lstm2)]
        tags_hat = [W * t + v for t in y]
        return tags_hat
Example #2
0
def do_cpu():
	C.renew_cg()
	W = C.parameter(cpW)
	W = W*W*W*W*W*W*W
	z = C.squared_distance(W,W)
	z.value()
	z.backward()
Example #3
0
 def predict(self, task, inputs):
     preds = []
     for input in inputs:
         dn.renew_cg()
         out = self(task, input)
         preds.append(np.argmax(out.npvalue()))
     return preds
Example #4
0
    def build_representations_bi(self,
                                 sentence,
                                 training,
                                 prefix=[],
                                 do_not_renew=False):
        if not do_not_renew:
            dy.renew_cg(immediate_compute=True, check_validity=True)
        coded_sentence = self.vocabulary.code_sentence_cw(sentence, training)
        coded_prefix = self.vocabulary.code_sentence_cw(prefix, training)

        w_init_f = self.wrnn[F].initial_state()
        w_init_b = self.wrnn[B].initial_state()

        f_lstm_input = self.get_static_representations(coded_prefix +
                                                       coded_sentence)
        b_lstm_input = self.get_static_representations(
            coded_prefix + list(reversed(coded_sentence)))

        contextual_embeddings = [
            w_init_f.transduce(f_lstm_input),
            list(reversed(w_init_b.transduce(b_lstm_input)))
        ]

        return (dy.concatenate([
            contextual_embeddings[F][-1], contextual_embeddings[B][0]
        ]), [dy.concatenate(list(fb)) for fb in zip(*contextual_embeddings)])
Example #5
0
    def predict(self, X_test, x_y_vectors=None):
        """
        Predict the classification of the test set
        """
        model = self.model
        model_parameters = self.model_parameters
        builder = self.builder
        test_pred = []

        # Predict every 100 instances together
        for chunk in xrange(0, len(X_test), MINIBATCH_SIZE):
            dy.renew_cg()
            path_cache = {}
            test_pred.extend([
                np.argmax(
                    process_one_instance(
                        builder,
                        model,
                        model_parameters,
                        path_set,
                        path_cache,
                        self.update,
                        dropout=0.0,
                        x_y_vectors=x_y_vectors[chunk + i]
                        if x_y_vectors is not None else None,
                        num_hidden_layers=self.num_hidden_layers).npvalue())
                for i, path_set in enumerate(X_test[chunk:chunk +
                                                    MINIBATCH_SIZE])
            ])

        return test_pred
Example #6
0
    def predict_with_score(self, X_test, x_y_vectors=None):
        """
        Predict the classification of the test set
        """
        model = self.model
        builder = self.builder

        dy.renew_cg()

        path_cache = {}
        test_pred = [
            process_one_instance(
                builder,
                model,
                path_set,
                path_cache,
                self.update,
                dropout=0.0,
                x_y_vectors=x_y_vectors[i]
                if x_y_vectors is not None else None,
                num_hidden_layers=self.num_hidden_layers).npvalue()
            for i, path_set in enumerate(X_test)
        ]

        return [(np.argmax(vec), vec[np.argmax(vec)]) for vec in test_pred]
Example #7
0
 def epoch_train(self,examples):
     count=0
     dy.renew_cg()
     current_losses = [ ]
     loss_list = []
     for word,context in (examples):
         loss = self.get_score(word,context)
         current_losses.append(loss)
         loss_list.append(loss.value())
         if len(current_losses) >= self.batch_size:
             mean_loss = dy.esum(current_losses) / float(len(current_losses))
             mean_loss.forward()
             mean_loss.backward()
             self.optimizer.update()
             current_losses = [ ]
             dy.renew_cg()
         count+=1
         ## Print out the average loss in every 1M example
         if count%1000000==1000:
             print (count,np.mean(np.array(loss_list)))
             loss_list = []
     if current_losses:
         mean_loss = dy.esum(current_losses) / float(len(current_losses))
         mean_loss.forward()
         mean_loss.backward()
         self.optimizer.update()
Example #8
0
def predict(board_ins, board_init):
    act = []
    count = 0
    previous = None
    first = True
    for sentence, env in zip(board_ins, board_init):
        if count % 5 != 0:
            new_sentence = pre_sentence + ' <end> ' + sentence
            new_env = str(execute(new_env, generate))
            if new_env == 'None':
                new_env = '1:_ 2:_ 3:_ 4:_ 5:_ 6:_ 7:_'
        else:
            dy.renew_cg()
            new_sentence = sentence
            new_env = env
        generate, previous = generator(encoder, decoder, params_encoder,
                                       params_decoder, new_sentence, new_env,
                                       first, previous)
        act.append(generate)
        pre_sentence = sentence
        count += 1
        while '<end>' in generate:
            generate.remove('<end>')
    env_list = []
    final_env_list = []
    for i, env in enumerate(board_init):
        if i % 5 == 0:
            new_env = env
        new_env = str(execute(new_env, act[i]))
        if new_env == 'None':
            new_env = '1:_ 2:_ 3:_ 4:_ 5:_ 6:_ 7:_'
        env_list.append(new_env)
        if i % 5 == 4:
            final_env_list.append(new_env)
    return env_list, final_env_list
Example #9
0
File: dynmt.py Project: ufwt/TraFix
def compute_batch_loss(encoder, decoder, batch_input_seqs, batch_output_seqs,
                       y2int):
    # renew computation graph per batch
    dn.renew_cg()

    batch_size = len(batch_input_seqs)

    # encode batch with bilstm encoder: each element represents one step in time, and is a matrix of 2*h x batch size
    # for example, for sentence length of 12, blstm_outputs wil be: 12 x 2 x 100 x 16
    # note: also adding begin_seq, end_seq symbols here!
    encoded_inputs, input_masks = encoder.encode_batch(batch_input_seqs)

    # concatenate the end seq symbols to the output sequence
    padded_batch_output_seqs = [
        seq + [common.END_SEQ] for seq in batch_output_seqs
    ]

    # get output word ids for each step of the decoder
    output_word_ids, output_masks, output_tot = common.get_batch_word_ids(
        padded_batch_output_seqs, y2int)

    total_batch_loss = decoder.compute_decoder_batch_loss(
        encoded_inputs, input_masks, output_word_ids, output_masks, batch_size)

    return total_batch_loss
 def get_output(self, sents):
     dy.renew_cg()
     tagged_sents = []
     for sent in sents:
         features, t_feats, _ = self.get_features_for_tagging(sent, False)
         cur_tag_seq, _ = self.crf_module.viterbi_decoding(features, t_feats)
         tagged_sents.append(cur_tag_seq)
     return tagged_sents
Example #11
0
    def predict(self,
                feature_vector,
                task_ids,
                train=False,
                soft_labels=False,
                temperature=None,
                dropout_rate=0.0,
                orthogonality_weight=0.0,
                domain_id=None):
        dynet.renew_cg()  # new graph

        feature_vector = feature_vector.toarray()
        feature_vector = np.squeeze(feature_vector, axis=0)

        # self.input = dynet.vecInput(self.vocab_size)
        # self.input.set(feature_vector)
        # TODO this takes too long; can we speed this up somehow?
        input = dynet.inputVector(feature_vector)
        for i in range(self.h_layers):
            if train:  # add some noise
                input = dynet.noise(input, self.noise_sigma)
                input = dynet.dropout(input, dropout_rate)
            input = self.layers[i](input)
        outputs = []
        for task_id in task_ids:
            output = self.output_layers_dict[task_id](input,
                                                      soft_labels=soft_labels,
                                                      temperature=temperature)
            outputs.append(output)

        constraint, adv_loss = 0, 0
        if orthogonality_weight != 0:
            # put the orthogonality constraint either directly on the
            # output layer or on the hidden layer if it's an MLP
            F0_layer = self.output_layers_dict["F0"]
            F1_layer = self.output_layers_dict["F1"]
            F0_param = F0_layer.W_mlp if self.add_hidden else F0_layer.W
            F1_param = F1_layer.W_mlp if self.add_hidden else F1_layer.W
            F0_W = dynet.parameter(F0_param)
            F1_W = dynet.parameter(F1_param)

            # calculate the matrix product of the task matrix with both others
            matrix_product = dynet.transpose(F0_W) * F1_W

            # take the squared Frobenius norm by squaring
            # every element and then summing them
            squared_frobenius_norm = dynet.sum_elems(
                dynet.square(matrix_product))
            constraint += squared_frobenius_norm
            # print('Constraint with first matrix:', squared_frobenius_norm.value())

        if domain_id is not None:
            # flip the gradient when back-propagating through here
            adv_input = dynet.flip_gradient(input)  # last state
            adv_output = self.adv_layer(adv_input)
            adv_loss = self.pick_neg_log(adv_output, domain_id)
            # print('Adversarial loss:', avg_adv_loss.value())
        return outputs, constraint, adv_loss
Example #12
0
    def generate(self, num, limit=40, beam=3):
        dy.renew_cg()

        generated = []

        W = dy.parameter(self.W)
        b = dy.parameter(self.b)

        for wordi in range(num):

            # Initialize the LSTM state with EOW token.
            start_state = self.lstm.initial_state()
            start_state = start_state.add_input(self.lookup[self.c2i[EOW]])
            best_states = [('', start_state, 0)]

            final_hypotheses = []

            # Perform beam search.
            while len(final_hypotheses) < beam and len(best_states) > 0:
                new_states = []

                for hyp, s, p in best_states:

                    # Cutoff when we exceed the character limit.
                    if len(hyp) >= limit:
                        final_hypotheses.append((hyp, p))
                        continue

                    # Get the prediction from the current LSTM state.
                    unnormalized = dy.affine_transform([b, W, s.output()])
                    softmax = dy.softmax(unnormalized).npvalue()

                    # Sample beam number of times.
                    for beami in range(beam):
                        ci = sample_softmax(softmax)
                        c = self.i2c[ci]
                        next_p = softmax[ci]
                        logp = p - np.log(next_p)

                        if c == EOW:
                            # Add final hypothesis if we reach end of word.
                            final_hypotheses.append((hyp, logp))
                        else:
                            # Else add to states to search next time step.
                            new_states.append((hyp + c,
                                               s.add_input(self.lookup[ci]),
                                               logp))

                # Sort and prune the states to within the beam.
                new_states.sort(key=lambda t: t[-1])
                best_states = new_states[:beam]

            final_hypotheses.sort(key=lambda t: t[-1])

            generated.append(final_hypotheses[0][0])

        return generated
Example #13
0
 def evaluate_network(self, t):
     dy.renew_cg()
     W = self.params["W"][0]
     b = self.params["b"][0]
     x = dy.vecInput(2)
     x.set(t)
     
     if self.with_bias[0]:
         output = derlu(W * (x + b))
     else:
         output = derlu(W * x)
     self.last_output = output
     return output
Example #14
0
def do_cpu():
    import _dynet as C
    C.init()
    cm = C.Model()
    cpW = cm.add_parameters((1000, 1000))
    s = time.time()
    C.renew_cg()
    W = C.parameter(cpW)
    W = W * W * W * W * W * W * W
    z = C.squared_distance(W, W)
    z.value()
    z.backward()
    print("CPU time:", time.time() - s)
Example #15
0
def do_cpu():
  import _dynet as C
  C.init()
  cm = C.Model()
  cpW = cm.add_parameters((1000,1000))
  s = time.time()
  C.renew_cg()
  W = C.parameter(cpW)
  W = W*W*W*W*W*W*W
  z = C.squared_distance(W,W)
  z.value()
  z.backward()
  print("CPU time:",time.time() - s)
Example #16
0
    def construct_vector(self, sentence_as_char_codes):
        params = self.params
        dy.renew_cg()
        builder = params["builders"][0]
        E = params["E"]
        sentence_as_wembs = []
        for word in sentence_as_char_codes:
            char_lstm = builder.initial_state()
            cembs = [E[char] for char in word]

            # running the char-level lstm
            word_vec = char_lstm.transduce(cembs)[-1]
            sentence_as_wembs.append(word_vec)
        return sentence_as_wembs
Example #17
0
def train(epoch, trees, policy, trainer, best, best_idx, wrong_total_l):
    # hyper edge fragment hyper-prec hyper-recall root
    metric_total = [0] * 6
    micro_total = [0.] * 3
    wrong_at_total = [0.] * 10
    np.random.shuffle(trees)
    loss = 0
    policy.set_dropout(args.path_dropout_rate)
    for i_episode in tqdm(range(len(trees))):
        T = trees[i_episode]
        entropy_l = []
        dy.renew_cg()
        policy.re_init()
        for _ in range(args.n_rollout):
            # prob_l = []
            policy.init_history()
            policy.rewards.append([])
            policy.saved_actions.append([])
            while len(T.V) > 0:
                pair, pr, entropy = select_action(T, policy, choose_max=False, mode='train')
                if pair is None:
                    break
                entropy_l.append(entropy)
                # prob_l.append(pr)
                T.update(pair)
                if args.reward_form != 'last' or len(T.V) == 0:
                    reward = T.eval(reward_type=args.reward, reward_form=args.reward_form)
                else:
                    reward = 0
                policy.rewards[-1].append(reward)
            metric_total, micro_total, wrong_at_total, wrong_total = T.evaluate(metric_total, micro_total,
                                                                                wrong_at_total, reward_type='print')
            wrong_total_l.append(wrong_total)
            # scores_save.append(T.evaluate(reward_type=REWARD, return_all=True))
            # prob_save.append(prob_l)
            T.re_init()
        loss += finish_episode(policy, trainer, entropy_l)
    for m_idx in range(5):
        metric_total[m_idx] = round(metric_total[m_idx] / len(trees) / args.n_rollout, 3)
    metric_total[0] = T.f1_calc(metric_total[3], metric_total[4])
    for w_idx in range(len(wrong_at_total)):
        wrong_at_total[w_idx] = round(wrong_at_total[w_idx] / len(trees) / args.n_rollout, 3)
    metric_total[5] /= args.n_rollout
    best, best_idx = update_best(metric_total, best, best_idx, epoch)
    if epoch % 1 == 0:
        print '[train]epoch {}:{} {} {} {}'.format(epoch, metric_total, micro_total, get_micro_f1(micro_total),
                                                   wrong_at_total),
        print 'total_loss', loss, 'best', best, best_idx
    return best, best_idx
Example #18
0
 def evaluate_network_from_sentence(self, sentence):
     dy.renew_cg()
     E = self.p3a.params["E"]
     E_pre = self.params["E_pre"]
     E_suf = self.params["E_suf"]
     input_vectors = []
     pre_suf_pairs = self.encoder.encode_sentence_prefix_suffix(sentence)
     sentence_codes = self.encoder.encode_sentence_words(sentence)
     for i in range(len(sentence)):
         vec = E[sentence_codes[i][0]]
         pre_code, suf_code = pre_suf_pairs[i]
         if pre_code >= 0: vec += E_pre[pre_code]
         if suf_code >= 0: vec += E_suf[suf_code]
         input_vectors.append(vec)
     return self.p3a.common.evaluate_network_from_embs(input_vectors, False)
Example #19
0
def do_gpu():
    import _dynet as G
    import sys
    sys.argv.append('--dynet-devices')
    sys.argv.append('GPU:0')
    G.init()
    gm = G.Model()
    gpW = gm.add_parameters((1000, 1000))
    s = time.time()
    G.renew_cg()
    W = G.parameter(gpW)
    W = W * W * W * W * W * W * W
    z = G.squared_distance(W, W)
    z.value()
    z.backward()
    print("GPU time:", time.time() - s)
Example #20
0
def do_gpu():
  import _dynet as G
  import sys 
  sys.argv.append('--dynet-devices')
  sys.argv.append('GPU:0')
  G.init()
  gm = G.Model()
  gpW = gm.add_parameters((1000,1000))
  s = time.time()
  G.renew_cg()
  W = G.parameter(gpW)
  W = W*W*W*W*W*W*W
  z = G.squared_distance(W,W)
  z.value()
  z.backward()
  print("GPU time:",time.time() - s)
Example #21
0
    def predict(self, seq, train=False, output_confidences=False, unk_tag=None, update_embeds=True):
        """
        predict tags for a sentence represented as char+word embeddings and compute losses for this instance
        """
        if not train:
            dynet.renew_cg()
        features = self.get_features(seq.words, train=train, update=update_embeds)

        output_expected_at_layer = self.predictors["task_expected_at"][seq.task_id]
        output_expected_at_layer -=1

        # go through layers
        # input is now combination of w + char emb
        prev = features
        prev_rev = features
        num_layers = self.h_layers

        for i in range(0,num_layers):
            predictor = self.predictors["inner"][i]
            forward_sequence, backward_sequence = predictor.predict_sequence(prev, prev_rev)        
            if i > 0 and self.activation:
                # activation between LSTM layers
                forward_sequence = [self.activation(s) for s in forward_sequence]
                backward_sequence = [self.activation(s) for s in backward_sequence]

            if i == output_expected_at_layer:
                output_predictor = self.predictors["output_layers_dict"][seq.task_id]
                concat_layer = [dynet.concatenate([f, b]) for f, b in zip(forward_sequence,reversed(backward_sequence))]

                if train and self.noise_sigma > 0.0:
                    concat_layer = [dynet.noise(fe,self.noise_sigma) for fe in concat_layer]
                # fill-in predictions and get loss per tag
                losses = output_predictor.predict_sequence(seq, concat_layer,
                                                           train=train, output_confidences=output_confidences,
                                                           unk_tag=unk_tag, dictionary=self.dictionary,
                                                           type_constraint=self.type_constraint)

            prev = forward_sequence
            prev_rev = backward_sequence 

        if train:
            # return losses
            return losses
        else:
            return seq.pred_tags, seq.tag_confidences
Example #22
0
    def predict(self, seq, train=False, output_confidences=False, unk_tag=None, update_embeds=True):
        """
        predict tags for a sentence represented as char+word embeddings and compute losses for this instance
        """
        if not train:
            dynet.renew_cg()
        features = self.get_features(seq.words, train=train, update=update_embeds)

        output_expected_at_layer = self.predictors["task_expected_at"][seq.task_id]
        output_expected_at_layer -=1

        # go through layers
        # input is now combination of w + char emb
        prev = features
        prev_rev = features
        num_layers = self.h_layers

        for i in range(0,num_layers):
            predictor = self.predictors["inner"][i]
            forward_sequence, backward_sequence = predictor.predict_sequence(prev, prev_rev)        
            if i > 0 and self.activation:
                # activation between LSTM layers
                forward_sequence = [self.activation(s) for s in forward_sequence]
                backward_sequence = [self.activation(s) for s in backward_sequence]

            if i == output_expected_at_layer:
                output_predictor = self.predictors["output_layers_dict"][seq.task_id]
                concat_layer = [dynet.concatenate([f, b]) for f, b in zip(forward_sequence,reversed(backward_sequence))]

                if train and self.noise_sigma > 0.0:
                    concat_layer = [dynet.noise(fe,self.noise_sigma) for fe in concat_layer]
                # fill-in predictions and get loss per tag
                losses = output_predictor.predict_sequence(seq, concat_layer,
                                                           train=train, output_confidences=output_confidences,
                                                           unk_tag=unk_tag, dictionary=self.dictionary,
                                                           type_constraint=self.type_constraint)

            prev = forward_sequence
            prev_rev = backward_sequence 

        if train:
            # return losses
            return losses
        else:
            return seq.pred_tags, seq.tag_confidences
Example #23
0
 def __init__(self,word_size,context_fre, context_size,vocab,window=2,subsample_n=2000,mode='bow',embed_size=200, batch_size=128,num_sampled=5, epoch=6):
     self.embed_size = embed_size
     self.mode = mode
     self.window = window
     self.vocab = vocab
     self.word_size = word_size
     self.subsample_n = subsample_n
     self.context_size = context_size
     self.num_sampled = num_sampled
     self.epoch = epoch
     self.context_fre = context_fre
     self.batch_size=batch_size
     self.pc = dy.ParameterCollection()
     self.optimizer = dy.AdamTrainer(self.pc)
     self.word_embeddings = self.pc.add_lookup_parameters((self.word_size, self.embed_size), name="word-embeddings")
     self.context_embeddings = self.pc.add_lookup_parameters((self.context_size, self.embed_size), name="context-embeddings")
     dy.renew_cg()
     print ([(param.name(), param.shape()) for param in self.pc.lookup_parameters_list() + self.pc.parameters_list()])
Example #24
0
def create_network_params(nwords, ntags, external_E=None):
    # create a parameter collection and add the parameters.
    print("adding parameters")
    m = dy.ParameterCollection()

    print("nwords: {}".format(nwords))
    E = m.add_lookup_parameters((nwords, EMB), name='E')
    if external_E and sum(external_E.shape) > 0:
        assert external_E.shape[1] == EMB
        external_rows = external_E.shape[0]
        for r in range(external_rows):
            E.init_row(r, external_E[r, :])

    b = m.add_parameters(HIDDEN, name='b')
    U = m.add_parameters((ntags, HIDDEN), name='U')
    W = m.add_parameters((HIDDEN, INPUT), name='W')
    bp = m.add_parameters(ntags, name='bp')
    dy.renew_cg()
    return m, E, b, U, W, bp
Example #25
0
    def get_top_k_paths(self, all_paths, relation_index, threshold):
        """
        Get the top k scoring paths
        """
        builder = self.builder
        model = self.model
        model_parameters = self.model_parameters
        lemma_lookup = model_parameters['lemma_lookup']
        pos_lookup = model_parameters['pos_lookup']
        dep_lookup = model_parameters['dep_lookup']
        dir_lookup = model_parameters['dir_lookup']

        path_scores = []

        for i, path in enumerate(all_paths):

            if i % 1000 == 0:
                cg = dy.renew_cg()
                W1 = dy.parameter(model_parameters['W1'])
                b1 = dy.parameter(model_parameters['b1'])
                W2 = None
                b2 = None

                if self.num_hidden_layers == 1:
                    W2 = dy.parameter(model_parameters['W2'])
                    b2 = dy.parameter(model_parameters['b2'])

            path_embedding = get_path_embedding(builder, lemma_lookup,
                                                pos_lookup, dep_lookup,
                                                dir_lookup, path)

            if self.use_xy_embeddings:
                zero_word = dy.inputVector([0.0] * self.lemma_embeddings_dim)
                path_embedding = dy.concatenate(
                    [zero_word, path_embedding, zero_word])

            h = W1 * path_embedding + b1

            if self.num_hidden_layers == 1:
                h = W2 * dy.tanh(h) + b2

            path_score = dy.softmax(h).npvalue().T
            path_scores.append(path_score)

        path_scores = np.vstack(path_scores)

        top_paths = []
        for i in range(len(relation_index)):
            indices = np.argsort(-path_scores[:, i])
            top_paths.append([
                (all_paths[index], path_scores[index, i]) for index in indices
                if threshold is None or path_scores[index, i] >= threshold
            ])

        return top_paths
Example #26
0
    def evaluate_adversary(self, dataset):
        loss = 0
        acc = 0
        tot = len(dataset)
        
        predictions = []
        for i, ex in enumerate(dataset):
            
            dy.renew_cg()
            vec, labels = ex
            vec = dy.inputVector(vec)
            
            l, p = self.adversary_classifier.get_loss_and_prediction(vec, labels)
            
            predictions.append(p)
            if p == labels:
                acc += 1
            loss += l.value()

        return loss / tot, acc / tot * 100, predictions
Example #27
0
    def build_representations_mono(self,
                                   sentence,
                                   training,
                                   prefix=[],
                                   do_not_renew=False):
        if not do_not_renew:
            dy.renew_cg(immediate_compute=True, check_validity=True)

        coded_sentence = self.vocabulary.code_sentence_cw(sentence, training)
        coded_prefix = self.vocabulary.code_sentence_cw(prefix, training)
        #print(prefix)
        #print(coded_prefix)

        w_init_f = self.wrnn[F].initial_state()

        f_lstm_input = self.get_static_representations(coded_prefix +
                                                       coded_sentence)

        contextual_embeddings = w_init_f.transduce(f_lstm_input)

        return (contextual_embeddings[-1], contextual_embeddings)
Example #28
0
def train(network,
          trainer,
          words,
          epochs,
          batch_size=100,
          max_batch_num=5,
          callback=lambda *args: None):
    last_loss = None

    batch_num = min(len(words) // batch_size + 1, max_batch_num)

    for enum in range(epochs):
        eloss = 0
        bnum = 0

        for bi in range(batch_num):
            bwords = np.random.choice(words, size=batch_size, replace=True)
            if len(bwords) < 1:
                continue
            dy.renew_cg()
            loss = network.train_batch(bwords)
            eloss += loss.value()
            loss.backward()
            trainer.update()
            bnum += 1

        eloss = eloss / bnum

        if last_loss:
            last_loss = 0.95 * last_loss + 0.05 * eloss
        else:
            last_loss = eloss

        # print('Epoch {} loss: {:.6f}  Running avg.: {:.6f}'.format(
        #     enum + 1, eloss, last_loss))
        callback(enum, eloss, last_loss)

    return last_loss
    def calculate_loss(self, sents):
        dy.renew_cg()
        losses = []
        for sent in sents:
            features, t_features, feat_reconstruct = self.get_features_for_tagging(
                sent, True
            )
            gold_tags = [tag for chars, word, feats, tag in sent]
            cur_loss = self.crf_module.negative_log_loss(
                features, t_features, gold_tags
            )
            if self.autoencoder:
                autoencoder_loss = [
                    dy.binary_log_loss(reconstruct, dy.inputTensor(feats))
                    for reconstruct, (chars, word, feats, tag) in zip(
                        feat_reconstruct, sent
                    )
                ]
            else:  # remove autoencoder loss
                autoencoder_loss = [dy.scalarInput(0)]
            losses.append(cur_loss + (dy.esum(autoencoder_loss) / self.featsize))

        return dy.esum(losses)
Example #30
0
def test(parser,
         testing_data,
         evalb_dir,
         unsupervised=False,
         test_bert_embeddings=None):
    test_predicted = []
    for idx, data in enumerate(testing_data):
        dy.renew_cg()
        if test_bert_embeddings is None:
            predicted = parser.parse(data, False)
        else:
            predicted = parser.parse(data, False, test_bert_embeddings[idx])
        test_predicted.append(predicted)

    if unsupervised:
        test_predicted_errs = [x[0] for x in test_predicted]
        test_predicted_span_sets = [x[1] for x in test_predicted]
        test_fscore = evaluate.evalb_US(testing_data, test_predicted_span_sets)
        test_ppl = evaluate.evalb_ppl(testing_data, test_predicted_errs)
        return test_fscore, test_ppl
    else:
        test_fscore = evaluate.evalb(testing_data, test_predicted)
        # test_fscore = evaluate.evalb_tag(testing_data, test_predicted)
        return test_fscore
Example #31
0
    def predict(self,
                feature_vector,
                train=False,
                soft_labels=False,
                temperature=None,
                dropout_rate=None):
        dynet.renew_cg()  # new graph

        feature_vector = feature_vector.toarray()
        feature_vector = np.squeeze(feature_vector, axis=0)

        # self.input = dynet.vecInput(self.vocab_size)
        # self.input.set(feature_vector)
        # TODO this takes too long; can we speed this up somehow?
        input = dynet.inputVector(feature_vector)
        for i in range(self.h_layers - 1):
            if train:  # add some noise
                input = dynet.noise(input, self.noise_sigma)
                input = dynet.dropout(input, dropout_rate)
            input = self.layers[i](input)
        output = self.layers[-1](input,
                                 soft_labels=soft_labels,
                                 temperature=temperature)
        return output
def main():
    parser = argparse.ArgumentParser(
        description=
        'Convolutional Neural Networks for Sentence Classification in DyNet')

    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='GPU ID to use. For cpu, set -1 [default: 0]')
    parser.add_argument(
        '--train_x_path',
        type=str,
        default='./data/train_x.txt',
        help='File path of train x data [default: `./data/train_x.txt`]')
    parser.add_argument(
        '--train_y_path',
        type=str,
        default='./data/train_y.txt',
        help='File path of train y data [default: `./data/train_x.txt`]')
    parser.add_argument(
        '--valid_x_path',
        type=str,
        default='./data/valid_x.txt',
        help='File path of valid x data [default: `./data/valid_x.txt`]')
    parser.add_argument(
        '--valid_y_path',
        type=str,
        default='./data/valid_y.txt',
        help='File path of valid y data [default: `./data/valid_y.txt`]')
    parser.add_argument('--n_epochs',
                        type=int,
                        default=10,
                        help='Number of epochs [default: 10]')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        help='Mini batch size [default: 64]')
    parser.add_argument('--win_sizes',
                        type=int,
                        nargs='*',
                        default=[3, 4, 5],
                        help='Window sizes of filters [default: [3, 4, 5]]')
    parser.add_argument(
        '--num_fil',
        type=int,
        default=100,
        help='Number of filters in each window size [default: 100]')
    parser.add_argument('--s',
                        type=float,
                        default=3.0,
                        help='L2 norm constraint on w [default: 3.0]')
    parser.add_argument('--dropout_prob',
                        type=float,
                        default=0.5,
                        help='Dropout probability [default: 0.5]')
    parser.add_argument(
        '--v_strategy',
        type=str,
        default='static',
        help=
        'Embedding strategy. rand: Random  initialization. static: Load pretrained embeddings and do not update during the training. non-static: Load pretrained embeddings and update during the training. [default: static]'
    )
    parser.add_argument(
        '--alloc_mem',
        type=int,
        default=4096,
        help='Amount of memory to allocate [mb] [default: 4096]')
    args = parser.parse_args()
    print(args)

    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)

    N_EPOCHS = args.n_epochs
    WIN_SIZES = args.win_sizes
    BATCH_SIZE = args.batch_size
    EMB_DIM = 300
    OUT_DIM = 1
    L2_NORM_LIM = args.s
    NUM_FIL = args.num_fil
    DROPOUT_PROB = args.dropout_prob
    V_STRATEGY = args.v_strategy
    ALLOC_MEM = args.alloc_mem

    if V_STRATEGY in ['rand', 'static', 'non-static']:
        NUM_CHA = 1
    else:
        NUM_CHA = 2

    # FILE paths
    W2V_PATH = './GoogleNews-vectors-negative300.bin'
    TRAIN_X_PATH = args.train_x_path
    TRAIN_Y_PATH = args.train_y_path
    VALID_X_PATH = args.valid_x_path
    VALID_Y_PATH = args.valid_y_path

    # DyNet setting
    dyparams = dy.DynetParams()
    dyparams.set_random_seed(RANDOM_SEED)
    dyparams.set_mem(ALLOC_MEM)
    dyparams.init()

    # Load pretrained embeddings
    pretrained_model = gensim.models.KeyedVectors.load_word2vec_format(
        W2V_PATH, binary=True)
    vocab = pretrained_model.wv.vocab.keys()
    w2v = pretrained_model.wv

    # Build dataset =======================================================================================================
    w2c = build_w2c(TRAIN_X_PATH, vocab=vocab)
    w2i, i2w = build_w2i(TRAIN_X_PATH, w2c, unk='unk')
    train_x, train_y = build_dataset(TRAIN_X_PATH,
                                     TRAIN_Y_PATH,
                                     w2i,
                                     unk='unk')
    valid_x, valid_y = build_dataset(VALID_X_PATH,
                                     VALID_Y_PATH,
                                     w2i,
                                     unk='unk')

    train_x, train_y = sort_data_by_length(train_x, train_y)
    valid_x, valid_y = sort_data_by_length(valid_x, valid_y)

    VOCAB_SIZE = len(w2i)
    print('VOCAB_SIZE:', VOCAB_SIZE)

    V_init = init_V(w2v, w2i)

    with open(os.path.join(RESULTS_DIR, './w2i.dump'),
              'wb') as f_w2i, open(os.path.join(RESULTS_DIR, './i2w.dump'),
                                   'wb') as f_i2w:
        pickle.dump(w2i, f_w2i)
        pickle.dump(i2w, f_i2w)

    # Build model =================================================================================
    model = dy.Model()
    trainer = dy.AdamTrainer(model)

    # V1
    V1 = model.add_lookup_parameters((VOCAB_SIZE, EMB_DIM))
    if V_STRATEGY in ['static', 'non-static', 'multichannel']:
        V1.init_from_array(V_init)
    if V_STRATEGY in ['static', 'multichannel']:
        V1_UPDATE = False
    else:  # 'rand', 'non-static'
        V1_UPDATE = True
    make_emb_zero(V1, [w2i['<s>'], w2i['</s>']], EMB_DIM)

    # V2
    if V_STRATEGY == 'multichannel':
        V2 = model.add_lookup_parameters((VOCAB_SIZE, EMB_DIM))
        V2.init_from_array(V_init)
        V2_UPDATE = True
        make_emb_zero(V2, [w2i['<s>'], w2i['</s>']], EMB_DIM)

    layers = [
        CNNText(model, EMB_DIM, WIN_SIZES, NUM_CHA, NUM_FIL, dy.tanh,
                DROPOUT_PROB),
        Dense(model, 3 * NUM_FIL, OUT_DIM, dy.logistic)
    ]

    # Train model ================================================================================
    n_batches_train = math.ceil(len(train_x) / BATCH_SIZE)
    n_batches_valid = math.ceil(len(valid_x) / BATCH_SIZE)

    start_time = time.time()
    for epoch in range(N_EPOCHS):
        # Train
        loss_all_train = []
        pred_all_train = []
        for i in tqdm(range(n_batches_train)):
            # Create a new computation graph
            dy.renew_cg()
            associate_parameters(layers)

            # Create a mini batch
            start = i * BATCH_SIZE
            end = start + BATCH_SIZE
            x = build_batch(train_x[start:end], w2i, max(WIN_SIZES)).T
            t = np.array(train_y[start:end])

            sen_len = x.shape[0]

            if V_STRATEGY in ['rand', 'static', 'non-static']:
                x_embs = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs = dy.transpose(x_embs)
                x_embs = dy.reshape(x_embs, (sen_len, EMB_DIM, 1))
            else:  # multichannel
                x_embs1 = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs2 = dy.concatenate_cols(
                    [dy.lookup_batch(V2, x_t, update=V2_UPDATE) for x_t in x])
                x_embs1 = dy.transpose(x_embs1)
                x_embs2 = dy.transpose(x_embs2)
                x_embs = dy.concatenate([x_embs1, x_embs2], d=2)

            t = dy.inputTensor(t, batched=True)
            y = forwards(layers, x_embs, test=False)

            mb_loss = dy.mean_batches(dy.binary_log_loss(y, t))

            # Forward prop
            loss_all_train.append(mb_loss.value())
            pred_all_train.extend(list(binary_pred(y.npvalue().flatten())))

            # Backward prop
            mb_loss.backward()
            trainer.update()

            # L2 norm constraint
            layers[1].scale_W(L2_NORM_LIM)

            # Make padding embs zero
            if V_STRATEGY in ['rand', 'non-static']:
                make_emb_zero(V1, [w2i['<s>'], w2i['</s>']], EMB_DIM)
            elif V_STRATEGY in ['multichannel']:
                make_emb_zero(V2, [w2i['<s>'], w2i['</s>']], EMB_DIM)

        # Valid
        loss_all_valid = []
        pred_all_valid = []
        for i in range(n_batches_valid):
            # Create a new computation graph
            dy.renew_cg()
            associate_parameters(layers)

            # Create a mini batch
            start = i * BATCH_SIZE
            end = start + BATCH_SIZE
            x = build_batch(valid_x[start:end], w2i, max(WIN_SIZES)).T
            t = np.array(valid_y[start:end])

            sen_len = x.shape[0]

            if V_STRATEGY in ['rand', 'static', 'non-static']:
                x_embs = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs = dy.transpose(x_embs)
                x_embs = dy.reshape(x_embs, (sen_len, EMB_DIM, 1))
            else:  # multichannel
                x_embs1 = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs2 = dy.concatenate_cols(
                    [dy.lookup_batch(V2, x_t, update=V2_UPDATE) for x_t in x])
                x_embs1 = dy.transpose(x_embs1)
                x_embs2 = dy.transpose(x_embs2)
                x_embs = dy.concatenate([x_embs1, x_embs2], d=2)

            t = dy.inputTensor(t, batched=True)
            y = forwards(layers, x_embs, test=True)

            mb_loss = dy.mean_batches(dy.binary_log_loss(y, t))

            # Forward prop
            loss_all_valid.append(mb_loss.value())
            pred_all_valid.extend(list(binary_pred(y.npvalue().flatten())))

        print(
            'EPOCH: %d, Train Loss:: %.3f (F1:: %.3f, Acc:: %.3f), Valid Loss:: %.3f (F1:: %.3f, Acc:: %.3f), Time:: %.3f[s]'
            % (
                epoch + 1,
                np.mean(loss_all_train),
                f1_score(train_y, pred_all_train),
                accuracy_score(train_y, pred_all_train),
                np.mean(loss_all_valid),
                f1_score(valid_y, pred_all_valid),
                accuracy_score(valid_y, pred_all_valid),
                time.time() - start_time,
            ))

        # Save model =========================================================================================================================
        if V_STRATEGY in ['rand', 'static', 'non-static']:
            dy.save(os.path.join(RESULTS_DIR, './model_e' + str(epoch + 1)),
                    [V1] + layers)
        else:
            dy.save(os.path.join(RESULTS_DIR, './model_e' + str(epoch + 1)),
                    [V1, V2] + layers)
Example #33
0
    def fit(self, train, num_iterations, dev=None, model_path=None, patience=0, minibatch_size=0, log_losses=False):
        """
        train the tagger
        """
        losses_log = {} # log losses

        print("init parameters")
        self.init_parameters(train)

        # init lookup parameters and define graph
        print("build graph")
        self.build_computation_graph(len(self.w2i),  len(self.c2i))

        update_embeds = True
        if self.backprob_embeds == False: ## disable backprob into embeds
            print(">>> disable wembeds update <<<")
            update_embeds = False
            
        best_val_acc, epochs_no_improvement = 0.0, 0

        if dev and model_path is not None and patience > 0:
            print('Using early stopping with patience of {}...'.format(patience))

        batch = []
        print("train..")
        for iteration in range(num_iterations):

            total_loss=0.0
            total_tagged=0.0

            indices = [i for i in range(len(train.seqs))]
            random.shuffle(indices)

            loss_accum_loss = defaultdict(float)
            loss_accum_tagged = defaultdict(float)

            for idx in indices:
                seq = train.seqs[idx]

                if seq.task_id not in losses_log:
                    losses_log[seq.task_id] = [] #initialize

                if minibatch_size > 1:
                    # accumulate instances for minibatch update
                    loss1 = self.predict(seq, train=True, update_embeds=update_embeds)
                    total_tagged += len(seq.words)
                    batch.append(loss1)
                    if len(batch) == minibatch_size:
                        loss = dynet.esum(batch)
                        total_loss += loss.value()

                        # logging
                        loss_accum_tagged[seq.task_id] += len(seq.words)
                        loss_accum_loss[seq.task_id] += loss.value()

                        loss.backward()
                        self.trainer.update()
                        dynet.renew_cg()  # use new computational graph for each BATCH when batching is active
                        batch = []
                else:
                    dynet.renew_cg() # new graph per item
                    loss1 = self.predict(seq, train=True, update_embeds=update_embeds)
                    total_tagged += len(seq.words)
                    lv = loss1.value()
                    total_loss += lv

                    # logging
                    loss_accum_tagged[seq.task_id] += len(seq.words)
                    loss_accum_loss[seq.task_id] += loss1.value()

                    loss1.backward()
                    self.trainer.update()

            print("iter {2} {0:>12}: {1:.2f}".format("total loss", total_loss/total_tagged, iteration))

            # log losses
            for task_id in sorted(losses_log):
                losses_log[task_id].append(loss_accum_loss[task_id] / loss_accum_tagged[task_id])

            if log_losses:
                dill.dump(losses_log, open(model_path + ".model" + ".losses.pickle", "wb"))

            if dev:
                # evaluate after every epoch
                correct, total = self.evaluate(dev, "task0")
                val_accuracy = correct/total
                print("dev accuracy: {0:.4f}".format(val_accuracy))

                if val_accuracy > best_val_acc:
                    print('Accuracy {0:.4f} is better than best val accuracy '
                          '{1:.4f}.'.format(val_accuracy, best_val_acc))
                    best_val_acc = val_accuracy
                    epochs_no_improvement = 0
                    save(self, model_path)
                else:
                    print('Accuracy {0:.4f} is worse than best val loss {1:.4f}.'.format(val_accuracy, best_val_acc))
                    epochs_no_improvement += 1

                if patience > 0:
                    if epochs_no_improvement == patience:
                        print('No improvement for {} epochs. Early stopping...'.format(epochs_no_improvement))
                        break
Example #34
0
def create_computation_graph(num_lemmas,
                             num_pos,
                             num_dep,
                             num_directions,
                             num_relations,
                             wv=None,
                             use_xy_embeddings=False,
                             num_hidden_layers=0,
                             lemma_dimension=50):
    """
    Initialize the model
    :param num_lemmas Number of distinct lemmas
    :param num_pos Number of distinct part of speech tags
    :param num_dep Number of distinct depenedency labels
    :param num_directions Number of distinct path directions (e.g. >,<)
    :param num_relations Number of classes (e.g. binary = 2)
    :param wv Pre-trained word embeddings file
    :param use_xy_embeddings Whether to concatenate x and y word embeddings to the network input
    :param num_hidden_layers The number of hidden layers for the term-pair classification network
    :param lemma_dimension The dimension of the lemma embeddings
    :return:
    """
    # model = Model() -- gives error? tried to fix by looking at dynet tutorial examples -- GB
    dy.renew_cg()
    model = dy.ParameterCollection()
    network_input = LSTM_HIDDEN_DIM

    builder = dy.LSTMBuilder(NUM_LAYERS,
                             lemma_dimension + POS_DIM + DEP_DIM + DIR_DIM,
                             network_input, model)

    # Concatenate x and y
    if use_xy_embeddings:
        network_input += 2 * lemma_dimension

    #  'the optimal size of the hidden layer is usually between the size of the input and size of the output layers'
    hidden_dim = int((network_input + num_relations) / 2)

    model_parameters = {}

    if num_hidden_layers == 0:
        model_parameters['W1'] = model.add_parameters(
            (num_relations, network_input))
        model_parameters['b1'] = model.add_parameters((num_relations, 1))

    elif num_hidden_layers == 1:

        model_parameters['W1'] = model.add_parameters(
            (hidden_dim, network_input))
        model_parameters['b1'] = model.add_parameters((hidden_dim, 1))
        model_parameters['W2'] = model.add_parameters(
            (num_relations, hidden_dim))
        model_parameters['b2'] = model.add_parameters((num_relations, 1))

    else:
        raise ValueError('Only 0 or 1 hidden layers are supported')

    model_parameters['lemma_lookup'] = model.add_lookup_parameters(
        (num_lemmas, lemma_dimension))

    # Pre-trained word embeddings
    if wv is not None:
        model_parameters['lemma_lookup'].init_from_array(wv)

    model_parameters['pos_lookup'] = model.add_lookup_parameters(
        (num_pos, POS_DIM))
    model_parameters['dep_lookup'] = model.add_lookup_parameters(
        (num_dep, DEP_DIM))
    model_parameters['dir_lookup'] = model.add_lookup_parameters(
        (num_directions, DIR_DIM))

    return builder, model, model_parameters