Ejemplo n.º 1
0
    def __init__(self, params, vocab, embeddings, char_embeddings):
        """

        :param params:
        :param vocab:
        :param embeddings:
        :param char_embeddings:
        """
        self.params = params
        self.name = 'lstm_cascade'
        self.dim_char = params.dim_char
        self.dim_w = params.dim_w
        self.dim_char_h = params.dim_char_h
        self.dim_ote_h = params.dim_ote_h
        self.dim_ts_h = params.dim_ts_h
        self.input_win = params.input_win
        self.ds_name = params.ds_name
        # tag vocabulary of opinion target extraction and targeted sentiment
        self.ote_tag_vocab = params.ote_tag_vocab
        self.ts_tag_vocab = params.ts_tag_vocab
        self.dim_ote_y = len(self.ote_tag_vocab)
        self.dim_ts_y = len(self.ts_tag_vocab)
        self.n_epoch = params.n_epoch
        self.dropout_rate = params.dropout
        self.tagging_schema = params.tagging_schema
        self.clip_grad = params.clip_grad
        self.use_char = params.use_char
        # name of word embeddings
        self.emb_name = params.emb_name
        self.embeddings = embeddings
        self.vocab = vocab
        # character vocabulary
        self.char_vocab = params.char_vocab
        #self.td_proportions = params.td_proportions
        self.epsilon = params.epsilon
        #self.tc_proportions = params.tc_proportions
        self.pc = dy.ParameterCollection()

        if self.use_char:
            self.char_emb = CharEmb(pc=self.pc,
                                    n_chars=len(self.char_vocab),
                                    dim_char=self.dim_char,
                                    pretrained_embeddings=char_embeddings)
            self.lstm_char = dy.LSTMBuilder(1, self.dim_char, self.dim_char_h, self.pc)
            dim_input = self.input_win * self.dim_w + 2 * self.dim_char_h
        else:
            dim_input = self.input_win * self.dim_w
        # word embedding layer
        self.emb = WDEmb(pc=self.pc, n_words=len(vocab), dim_w=self.dim_w, pretrained_embeddings=embeddings)

        # lstm layers
        self.lstm_ote = dy.LSTMBuilder(1, dim_input, self.dim_ote_h, self.pc)
        self.lstm_ts = dy.LSTMBuilder(1, 2*self.dim_ote_h, self.dim_ts_h, self.pc)

        # fully connected layer
        self.fc_ote = Linear(pc=self.pc, n_in=2*self.dim_ote_h, n_out=self.dim_ote_y)
        self.fc_ts = Linear(pc=self.pc, n_in=2 * self.dim_ts_h, n_out=self.dim_ts_y)

        assert self.tagging_schema == 'BIEOS'
        transition_path = {'B': ['B-POS', 'B-NEG', 'B-NEU'],
                           'I': ['I-POS', 'I-NEG', 'I-NEU'],
                           'E': ['E-POS', 'E-NEG', 'E-NEU'],
                           'S': ['S-POS', 'S-NEG', 'S-NEU'],
                           'O': ['O']}
        self.transition_scores = np.zeros((self.dim_ote_y, self.dim_ts_y))
        for t in transition_path:
            next_tags = transition_path[t]
            n_next_tag = len(next_tags)
            ote_id = self.ote_tag_vocab[t]
            for nt in next_tags:
                ts_id = self.ts_tag_vocab[nt]
                self.transition_scores[ote_id][ts_id] = 1.0 / n_next_tag
        print(self.transition_scores)
        self.transition_scores = np.array(self.transition_scores, dtype='float32').transpose()

        # opinion target-opinion words co-occurrence modeling
        self.stm_lm = Linear(pc=self.pc, n_in=2*self.dim_ote_h, n_out=2*self.dim_ote_h, nonlinear='tanh')
        # fully connected layer for opinion-enhanced indicator prediction task
        self.fc_stm = Linear(pc=self.pc, n_in=2 * self.dim_ote_h, n_out=2)

        # gate for maintaining sentiment consistency
        self.W_gate = self.pc.add_parameters((2*self.dim_ote_h, 2*self.dim_ote_h),
                                             init=dy.UniformInitializer(0.2))

        # determine the optimizer
        if params.optimizer == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr)
        elif params.optimizer == 'adam':
            self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9)
        elif params.optimizer == 'adadelta':
            self.optimizer = dy.AdadeltaTrainer(self.pc)
        elif params.optimizer == 'momentum':
            self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9)
        else:
            raise Exception("Unsupported optimizer type: %s" % params.optimizer)
Ejemplo n.º 2
0
    def __init__(self, params, vocab, embeddings):
        """

        :param params: parameters
        :param vocab: vocabulary
        :param embeddings: pretrained word embeddings
        """
        self.params = params
        self.name = 'lstm_crf'
        self.dim_char = params.dim_char
        self.dim_w = params.dim_w
        self.dim_char_h = params.dim_char_h
        self.dim_ote_h = params.dim_ote_h
        self.dim_ts_h = params.dim_ts_h
        self.input_win = params.input_win
        self.ds_name = params.ds_name
        # tag vocabulary of opinion target extraction and targeted sentiment
        self.ote_tag_vocab = params.ote_tag_vocab
        self.ts_tag_vocab = params.ts_tag_vocab
        self.dim_ote_y = len(self.ote_tag_vocab)
        self.dim_ts_y = len(self.ts_tag_vocab)
        self.n_epoch = params.n_epoch
        self.dropout_rate = params.dropout
        self.tagging_schema = params.tagging_schema
        self.clip_grad = params.clip_grad
        self.use_char = params.use_char
        # name of word embeddings
        self.emb_name = params.emb_name
        self.embeddings = embeddings
        self.vocab = vocab
        # character vocabulary
        self.char_vocab = params.char_vocab
        self.pc = dy.ParameterCollection()

        # word embedding layer
        self.emb = WDEmb(pc=self.pc, n_words=len(vocab), dim_w=self.dim_w, pretrained_embeddings=embeddings)

        # input dimension
        dim_input = self.input_win * self.dim_w

        self.lstm_ts = dy.LSTMBuilder(1, dim_input, self.dim_ts_h, self.pc)

        # hidden layer between LSTM and CRF decoding layer
        self.hidden = Linear(pc=self.pc, n_in=2*self.dim_ts_h,
                                   n_out=self.dim_ts_h, use_bias=True, nonlinear='tanh')
        # map the word representation to the ts label space
        # in the label space, both BEG and END tag are considered
        self.fc_ts = Linear(pc=self.pc, n_in=self.dim_ts_h, n_out=self.dim_ts_y)

        # transition matrix, [i, j] is the transition score from tag i to tag j
        self.transitions = self.pc.add_lookup_parameters((self.dim_ts_y + 2, self.dim_ts_y + 2))

        # determine the optimizer
        if params.optimizer == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr)
        elif params.optimizer == 'adam':
            self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9)
        elif params.optimizer == 'adadelta':
            self.optimizer = dy.AdadeltaTrainer(self.pc)
        elif params.optimizer == 'momentum':
            self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9)
        else:
            raise Exception("Unsupported optimizer type: %s" % params.optimizer)
Ejemplo n.º 3
0
    def __init__(self, params, model=None, runtime=False):
        self.UPSAMPLE_PROJ = 200
        self.RNN_SIZE = 448
        self.RNN_LAYERS = 1
        self.OUTPUT_EMB_SIZE = 1
        self.params = params
        if model is None:
            self.model = dy.Model()
        else:
            self.model = model

        self.trainer = dy.AdamTrainer(self.model, alpha=1e-4)
        self.trainer.set_sparse_updates(True)
        self.trainer.set_clip_threshold(5.0)
        # self.trainer = dy.AdamTrainer(self.model)
        # MGCs are extracted at 12.5 ms
        from utils import orthonormal_VanillaLSTMBuilder
        lstm_builder = orthonormal_VanillaLSTMBuilder
        if runtime:
            lstm_builder = dy.VanillaLSTMBuilder

        upsample_count = int(12.5 * self.params.target_sample_rate / 1000)
        # self.upsample_w_s = []
        self.upsample_w_t = []
        # self.upsample_b_s = []
        self.upsample_b_t = []
        for _ in xrange(upsample_count):
            # self.upsample_w_s.append(self.model.add_parameters((self.UPSAMPLE_PROJ, self.params.mgc_order)))
            self.upsample_w_t.append(
                self.model.add_parameters(
                    (self.UPSAMPLE_PROJ, self.params.mgc_order * 2)))
            # self.upsample_b_s.append(self.model.add_parameters((self.UPSAMPLE_PROJ)))
            self.upsample_b_t.append(
                self.model.add_parameters((self.UPSAMPLE_PROJ)))

        self.output_coarse_lookup = self.model.add_lookup_parameters(
            (256, self.OUTPUT_EMB_SIZE))
        self.output_fine_lookup = self.model.add_lookup_parameters(
            (256, self.OUTPUT_EMB_SIZE))
        # self.rnn = orthonormal_VanillaLSTMBuilder(self.RNN_LAYERS, self.OUTPUT_EMB_SIZE + self.UPSAMPLE_PROJ, self.RNN_SIZE, self.model)
        self.rnnCoarse = lstm_builder(
            self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 2 + self.UPSAMPLE_PROJ,
            self.RNN_SIZE, self.model)
        self.rnnFine = lstm_builder(
            self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 3 + self.UPSAMPLE_PROJ,
            self.RNN_SIZE, self.model)
        # self.rnnCoarse = dy.GRUBuilder(self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 2 + self.UPSAMPLE_PROJ,
        #                                self.RNN_SIZE, self.model)
        # self.rnnFine = dy.GRUBuilder(self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 3 + self.UPSAMPLE_PROJ,
        #                              self.RNN_SIZE, self.model)

        self.mlp_coarse_w = []
        self.mlp_coarse_b = []
        self.mlp_coarse_w.append(
            self.model.add_parameters((self.RNN_SIZE, self.RNN_SIZE)))
        self.mlp_coarse_b.append(self.model.add_parameters((self.RNN_SIZE)))

        self.mlp_fine_w = []
        self.mlp_fine_b = []
        self.mlp_fine_w.append(
            self.model.add_parameters((self.RNN_SIZE, self.RNN_SIZE)))
        self.mlp_fine_b.append(self.model.add_parameters((self.RNN_SIZE)))

        self.softmax_coarse_w = self.model.add_parameters((256, self.RNN_SIZE))
        self.softmax_coarse_b = self.model.add_parameters((256))
        self.softmax_fine_w = self.model.add_parameters((256, self.RNN_SIZE))
        self.softmax_fine_b = self.model.add_parameters((256))
Ejemplo n.º 4
0
    def train(self, traindata, lr=1e-5):
        dy.renew_cg()
        trainer = dy.AdamTrainer(self.model, alpha=lr)

        # Loggin
        path = self.fname() + '.log' if len(
            traindata) > 5000 else self.fname() + 'tuning.log'
        f = open(os.path.join(EVALUATION_PATH, path), 'w')

        epoch_timing = []
        early = 0.0
        best = -1
        for epoch in range(self.EPOCH):
            print('\n')
            dy.renew_cg()
            losses = []
            closs = 0
            batch_timing = []
            for i, q1id in enumerate(traindata):
                for q2id in traindata[q1id]:
                    start = time.time()
                    query = traindata[q1id][q2id]['q1_full']
                    question = traindata[q1id][q2id]['q2_full']
                    label = traindata[q1id][q2id]['label']

                    loss = self.get_loss(query, question, label)
                    losses.append(loss)

                    if len(losses) == self.BATCH:
                        loss = dy.esum(losses)
                        _loss = loss.value()
                        closs += _loss
                        loss.backward()
                        trainer.update()
                        dy.renew_cg()

                        # percentage of trainset processed
                        percentage = str(
                            round((float(i + 1) / len(traindata)) * 100,
                                  2)) + '%'
                        # time of epoch processing
                        time_epoch = self.tepoch(epoch_timing)
                        print(
                            "Epoch: {0} \t\t Loss: {1} \t\t Epoch time: {2} \t\t Trainset: {3}"
                            .format(epoch + 1, round(_loss, 2), time_epoch,
                                    percentage),
                            end='       \r')
                        losses = []
                        batch_timing = []
                    end = time.time()
                    t = (end - start)
                    batch_timing.append(t)
                    epoch_timing.append(t)

            log = "Epoch: {0} \t\t Loss: {1} \t\t Best: {2}".format(
                epoch + 1, round(closs / self.BATCH, 2), round(best, 2))
            print('\n' + log)
            f.write(' '.join([log, '\n']))

            log = 'Dev evaluation...'
            print(log)
            f.write(log + '\n')
            map_baseline, map_model, f1score, accuracy = self.test(
                self.devdata)

            results = 'MAP Model: {0} \t MAP baseline: {1} \t F1 score: {2} \t Accuracy: {3}'.format(
                round(map_model, 2), round(map_baseline, 2), round(f1score, 2),
                round(accuracy, 2))
            print(results)
            f.write(results)

            epoch_timing = []
            if map_model > best:
                best = copy.copy(map_model)
                early = 0
                path = self.fname() + '.dy'
                self.model.save(os.path.join(EVALUATION_PATH, path))
            else:
                trainer.learning_rate *= 0.5
                early += 1

            if early == self.EARLY_STOP:
                break
        f.close()
Ejemplo n.º 5
0
input = np.loadtxt('input_sequences')
print "Loaded input"
output = np.loadtxt('output_sequences')
print "Loaded output"

X_train, X_test, y_train, y_test = train_test_split(input, output, test_size=0.1)
print "Split the data into train and test"

num_input = len(input[0])
num_output = num_input
num_hidden = int(sys.argv[1])
m = dy.Model()

dnn_1 = FeedForwardNeuralNet(m, [num_input, [num_hidden, num_hidden], num_output, [dy.rectify, dy.rectify, dy.logistic]])
trainer = dy.AdamTrainer(m)

train = zip(X_train, y_train)
test = zip(X_test, y_test)

for epoch in range(40):
   print "Epoch: ", epoch
   random.shuffle(train)
   train_loss = 0
   count = 0
   for (inp,out) in train:
     count += 1
     dy.renew_cg()
     loss = dnn_1.calculate_loss_classification(dy.inputTensor(inp), dy.inputTensor(out))
     train_loss += loss.value()
     loss.backward()
Ejemplo n.º 6
0
	np.random.seed(666)
	argparser = argparse.ArgumentParser()
	argparser.add_argument('--config_file', default='../configs/default.cfg')
	argparser.add_argument('--model', default='BaseParser')
	args, extra_args = argparser.parse_known_args()
	config = Configurable(args.config_file, extra_args)
	Parser = getattr(models, args.model)

	vocab = Vocab(config.train_file, config.pretrained_embeddings_file, config.min_occur_count)
	cPickle.dump(vocab, open(config.save_vocab_path, 'w'))
	parser = Parser(vocab, config.word_dims, config.pret_dims, config.lemma_dims, config.tag_dims, 
					config.dropout_emb, config.lstm_layers, config.lstm_hiddens, config.dropout_lstm_input, 
					config.dropout_lstm_hidden, config.mlp_rel_size, config.dropout_mlp)
	data_loader = DataLoader(config.train_file, config.num_buckets_train, vocab)
	pc = parser.parameter_collection
	trainer = dy.AdamTrainer(pc, config.learning_rate , config.beta_1, config.beta_2, config.epsilon)
	
	global_step = 0
	def update_parameters():
		trainer.learning_rate =config.learning_rate*config.decay**(global_step / config.decay_steps)
		trainer.update()

	epoch = 0
	best_F1 = 0.
	history = lambda x, y : open(os.path.join(config.save_dir, 'valid_history'),'a').write('%.2f %.2f\n'%(x,y))
	while global_step < config.train_iters:
		print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), '\nStart training epoch #%d'%(epoch, )
		epoch += 1
		for words, lemmas, tags, arcs, rels in \
				data_loader.get_batches(batch_size = config.train_batch_size, shuffle = True):
			num = int(words.shape[1]/2)
Ejemplo n.º 7
0
    def __init__(self, config, encodings, embeddings, runtime=False):
        # INTERNAL PARAMS ###################################################
        self.config = config
        self.encodings = encodings
        self.word_embeddings = embeddings
        self.config.char_vocabulary_size = len(encodings.characters)
        self.decoder_output_class_count = 3  # O S SX
        self.decoder_output_i2c = {}
        self.decoder_output_i2c[0] = "O"
        self.decoder_output_i2c[1] = "S"
        self.decoder_output_i2c[2] = "SX"
        self.decoder_output_c2i = {}
        self.decoder_output_c2i["O"] = 0
        self.decoder_output_c2i["S"] = 1
        self.decoder_output_c2i["SX"] = 2

        # NETWORK ###########################################################
        self.model = dy.Model()
        self.trainer = dy.AdamTrainer(self.model)
        self.trainer.set_sparse_updates(False)

        # EMBEDDING SPECIAL TOKENS
        self.word_embeddings_special = self.model.add_lookup_parameters(
            (2, self.word_embeddings.word_embeddings_size
             ))  # [0] = UNK, [1] = SENTENCE START

        # ENCODER-CHAR
        self.char_embeddings = self.model.add_lookup_parameters(
            (self.config.char_vocabulary_size,
             self.config.char_embedding_size))
        # self.next_chars_embedding = self.model.add_lookup_parameters(
        #    (self.config.char_vocabulary_size, self.config.next_chars_embedding_size))

        self.char_embeddings_punctuation = self.model.add_lookup_parameters(
            (self.config.char_generic_feature_vocabulary_size,
             self.config.char_generic_feature_embedding_size))
        self.char_embeddings_whitespace = self.model.add_lookup_parameters(
            (self.config.char_generic_feature_vocabulary_size,
             self.config.char_generic_feature_embedding_size))
        self.char_embeddings_uppercase = self.model.add_lookup_parameters(
            (self.config.char_generic_feature_vocabulary_size,
             self.config.char_generic_feature_embedding_size))
        self.encoder_char_input_size = self.config.char_embedding_size + 3 * self.config.char_generic_feature_embedding_size
        if runtime:
            self.encoder_char_lstm1_fw_builder = dy.VanillaLSTMBuilder(
                1, self.encoder_char_input_size,
                self.config.encoder_char_lstm_size, self.model)

            self.encoder_char_lstm2_bw_builder = dy.VanillaLSTMBuilder(
                1, self.config.next_chars_embedding_size +
                3 * self.config.char_generic_feature_embedding_size,
                self.config.encoder_char_lstm_size, self.model)
            self.encoder_word_lstm_builder = dy.VanillaLSTMBuilder(
                1, self.word_embeddings.word_embeddings_size,
                self.config.encoder_word_lstm_size, self.model)
        else:
            from utils import orthonormal_VanillaLSTMBuilder
            self.encoder_char_lstm1_fw_builder = orthonormal_VanillaLSTMBuilder(
                1, self.encoder_char_input_size,
                self.config.encoder_char_lstm_size, self.model)

            self.encoder_char_lstm2_bw_builder = orthonormal_VanillaLSTMBuilder(
                1, self.config.next_chars_embedding_size +
                3 * self.config.char_generic_feature_embedding_size,
                self.config.encoder_char_lstm_size, self.model)
            self.encoder_word_lstm_builder = orthonormal_VanillaLSTMBuilder(
                1, self.word_embeddings.word_embeddings_size,
                self.config.encoder_word_lstm_size, self.model)

        # ENCODER-WORD

        # self.att_w1 = self.model.add_parameters((
        #     self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3,
        #     self.config.encoder_char_lstm_size))
        # self.att_w2 = self.model.add_parameters((
        #     self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3,
        #     self.config.encoder_char_lstm_size))
        # self.att_v = self.model.add_parameters(
        #     (1, self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3))

        # DECODER

        self.holisticWE = self.model.add_lookup_parameters(
            (len(encodings.word2int),
             self.word_embeddings.word_embeddings_size))

        self.decoder_input_size = 2 * self.config.encoder_char_lstm_size + self.config.encoder_word_lstm_size + self.word_embeddings.word_embeddings_size

        self.decoder_hiddenW = self.model.add_parameters(
            (self.config.decoder_hidden_size, self.decoder_input_size))
        self.decoder_hiddenB = self.model.add_parameters(
            (self.config.decoder_hidden_size))
        self.decoder_outputW = self.model.add_parameters(
            (self.decoder_output_class_count, self.config.decoder_hidden_size))
        self.decoder_outputB = self.model.add_parameters(
            (self.decoder_output_class_count))

        self.aux_softmax_char_peek_w = self.model.add_parameters(
            (self.decoder_output_class_count,
             self.config.encoder_char_lstm_size))
        self.aux_softmax_char_peek_b = self.model.add_parameters(
            (self.decoder_output_class_count))

        self.aux_softmax_char_hist_w = self.model.add_parameters(
            (self.decoder_output_class_count,
             self.config.encoder_char_lstm_size))
        self.aux_softmax_char_hist_b = self.model.add_parameters(
            (self.decoder_output_class_count))
        print("done")
Ejemplo n.º 8
0
def char_train(network, train_set, val_set, test_set, test_set2,
               train_set_word, val_set_word, test_set_word, test_set2_word,
               epochs, batch_size, args, tag_to_ix):
    def get_val_set_loss(network, val_set, val_set_word, val_author_vecs,
                         pretrain, num_basis):
        loss = []
        vae_loss = [0]
        l2_loss = [0]
        for i, (input_sentence, output_sentence) in enumerate(val_set):
            if args.use_vae:
                l, a, v, l2 = network.get_full_loss(input_sentence,
                                                    val_set_word[i][0],
                                                    output_sentence,
                                                    val_author_vecs[i],
                                                    pretrain)
                loss.append(l.value())
                vae_loss.append(v.value())
                l2_loss.append(l2.value())
            else:
                loss.append(
                    network.get_loss(input_sentence, val_set_word[i][0],
                                     output_sentence, val_author_vecs[i],
                                     pretrain).value())
            dy.renew_cg()
        return sum(loss) / len(val_set), sum(vae_loss) / len(val_set), sum(
            l2_loss) / len(val_set)

    def get_val_set_acc(network, val_set, val_set_word, val_author_vecs,
                        val_author_ids, pretrain, num_basis):
        evals = []
        if args.use_vae:
            for i, (input_sentence, output_sentence) in enumerate(val_set):
                evals.append(
                    network.full_evaluate_acc(input_sentence,
                                              val_set_word[i][0],
                                              output_sentence,
                                              val_author_vecs[i],
                                              val_author_ids[i], pretrain))
                dy.renew_cg()
        else:
            for i, (input_sentence, output_sentence) in enumerate(val_set):
                evals.append(
                    network.evaluate_acc(input_sentence, val_set_word[i][0],
                                         output_sentence, val_author_vecs[i],
                                         val_author_ids[i], pretrain))
                dy.renew_cg()
        dy.renew_cg()

        correct = [c for c, t, d, w, cc, e in evals]
        total = [t for c, t, d, w, cc, e in evals]
        mean = 0
        confidence = 0
        oov = [d for c, t, d, w, cc, e in evals]
        wrong = [w for c, t, d, w, cc, e in evals]
        correct2 = [cc for c, t, d, w, cc, e in evals]

        auth_correct = [
            c for i, (c, t, d, w, cc, e) in enumerate(evals)
            if val_author_vecs[i] is not None
        ]
        auth_total = [
            t for i, (c, t, d, w, cc, e) in enumerate(evals)
            if val_author_vecs[i] is not None
        ]
        non_auth_correct = [
            c for i, (c, t, d, w, cc, e) in enumerate(evals)
            if val_author_vecs[i] is None
        ]
        non_auth_total = [
            t for i, (c, t, d, w, cc, e) in enumerate(evals)
            if val_author_vecs[i] is None
        ]
        eids = [e for c, t, d, w, cc, e in evals]
        #unique_eid = set(eids)
        len_eid = num_basis
        counts = []
        for i in range(len_eid):
            counts.append(sum([e == i for e in eids]))
        counts2 = []
        for i in range(len_eid):
            counts2.append(
                sum([
                    e == i for j, e in enumerate(eids)
                    if val_author_vecs[j] is not None
                ]))

        if sum(non_auth_total) == 0:
            non_auth_total = [1]

        return 100.0 * sum(correct) / sum(total), mean, confidence, sum(
            oov), sum(wrong), sum(correct2), 100.0 * sum(auth_correct) / sum(
                auth_total), 100.0 * sum(non_auth_correct) / sum(
                    non_auth_total), counts, counts2

    #original_set = train_set
    #train_set = train_set*epochs

    if args.optimizer == 'adadelta':
        trainer = dy.AdadeltaTrainer(network.model)
        trainer.set_clip_threshold(5)
    elif args.optimizer == 'adam':
        trainer = dy.AdamTrainer(network.model, alpha=args.lr)
        trainer.set_clip_threshold(5)
    elif args.optimizer == 'sgd-momentum':
        trainer = dy.MomentumSGDTrainer(network.model, learning_rate=args.lr)
    else:
        logging.critical('This Optimizer is not valid or not allowed')

    losses = []
    iterations = []

    kk = args.pretrain_epochs

    if args.use_all_networks:
        args.network = 'follow'
        train_author_vecs1, dev_author_vecs1, test_author_vecs1, test2_author_vecs1, train_author_ids, dev_author_ids, test_author_ids, test2_author_ids = extract_authorvecs(
            args)

        args.network = 'mention'
        train_author_vecs2, dev_author_vecs2, test_author_vecs2, test2_author_vecs2, _, _, _, _ = extract_authorvecs(
            args)

        args.network = 'retweet'
        train_author_vecs3, dev_author_vecs3, test_author_vecs3, test2_author_vecs3, _, _, _, _ = extract_authorvecs(
            args)

        train_author_vecs = []
        for i, j, k in zip(train_author_vecs1, train_author_vecs2,
                           train_author_vecs3):
            train_author_vecs.append((i, j, k))

        dev_author_vecs = []
        for i, j, k in zip(dev_author_vecs1, dev_author_vecs2,
                           dev_author_vecs3):
            dev_author_vecs.append((i, j, k))

        test_author_vecs = []
        for i, j, k in zip(test_author_vecs1, test_author_vecs2,
                           test_author_vecs3):
            test_author_vecs.append((i, j, k))

        test2_author_vecs = []
        for i, j, k in zip(test2_author_vecs1, test2_author_vecs2,
                           test2_author_vecs3):
            test2_author_vecs.append((i, j, k))

    else:
        train_author_vecs, dev_author_vecs, test_author_vecs, test2_author_vecs, train_author_ids, dev_author_ids, test_author_ids, test2_author_ids = extract_authorvecs(
            args)

    logging.info('obtained all author vectors ' + str(len(train_author_vecs)) +
                 ' ' + str(len(dev_author_vecs)) + ' ' +
                 str(len(test_author_vecs)) + ' ' +
                 str(len(test2_author_vecs)))

    batch_loss_vec = []
    dy.renew_cg()

    is_best = 0
    best_val = 0
    count = 0
    count_train = -1

    #early_stopping = 0

    for epoch in range(epochs):
        #if early_stopping>args.early_epochs:
        #	break

        all_inds = []
        num_train = int(len(train_set) / args.batch_size + 1) * args.batch_size

        #prev_time=time.time()

        for ii in range(num_train):

            count_train += 1
            if count_train == len(train_set):
                count_train = 0

            count += 1
            inputs, outputs = train_set[count_train]
            inputs_word, _ = train_set_word[count_train]
            '''
			data_point = {'inputs':inputs, 'inputs_word':inputs_word, 'outputs':outputs, 'train_author_vecs':train_author_vecs[i]}
			pickle.dump(data_point,open( "data_pickle/"+str(i)+".p", "wb" ))
			data_point = pickle.load( open( "data_pickle/"+str(i)+".p", "rb" ) )
			inputs = data_point['inputs']
			inputs_word = data_point['inputs_word']
			outputs = data_point['outputs']
			train_author_vec = data_point['train_author_vecs']
			'''

            #prev_time2 = time.time()
            #if train_author_vecs[count_train] !=None:

            vae_loss = 0
            if args.use_vae:
                loss, ind, vae_loss, l2_loss = network.get_full_loss(
                    inputs, inputs_word, outputs,
                    train_author_vecs[count_train], epoch < kk, True)
            else:
                loss, ind = network.get_loss(inputs, inputs_word, outputs,
                                             train_author_vecs[count_train],
                                             epoch < kk, True)

            #curr_time2 = time.time()

            #print ('time for one instance: ', curr_time2 - prev_time2)

            all_inds.append(ind)
            #print (loss)
            #a = input()
            batch_loss_vec.append(loss)

            if count % batch_size == 0:

                batch_loss = dy.esum(batch_loss_vec) / batch_size
                batch_loss.forward()
                batch_loss.backward()
                trainer.update()
                batch_loss_vec = []
                dy.renew_cg()
                count = 0
            #logging.info('finished minibatch: %d/%d',ii,num_train)

        #print ('until here-----')
        #curr_time = time.time()
        #print ('time for one epoch training: ', curr_time - prev_time)

        counts = []
        for i in range(args.num_basis):
            a = [v == i for v in all_inds]
            counts.append(sum(a))
        logging.info('distribution of the data points' + str(counts))

        #if ((i+1))%len(original_set) == 0:
        if args.plots:
            val_loss = get_val_set_loss(network, val_set, val_set_word,
                                        dev_author_vecs, epoch < kk,
                                        args.num_basis)
            losses.append(val_loss)
            iterations.append(epoch)
        #dy.renew_cg()

        #if ((i+1))%len(original_set)==0:
        train_loss = 0
        if args.slow:
            train_loss, train_vae_loss, train_l2_loss = get_val_set_loss(
                network, train_set, train_set_word, train_author_vecs,
                epoch < kk, args.num_basis)

        if args.write_errors:
            f = open(args.log_errors_file, 'a')
            f.write('\n--------- epoch no: --------- ')
            f.write(str(epoch) + '\n')
            f.close()
            f = open(args.log_errors_file, 'a')
            f.write('\n--------- oct27.train errors: --------- \n')
            f.close()
        #prev_time = time.time()
        trainacc, train_acc, train_confidence, oov_train, wrong_train, correct_train, auth_acc1, non_auth_acc1, eids1, counts21 = get_val_set_acc(
            network, train_set, train_set_word, train_author_vecs,
            train_author_ids, epoch < kk, args.num_basis)
        #curr_time = time.time()
        #print ('time for acc train: ', curr_time - prev_time)

        if args.write_errors:
            f = open(args.log_errors_file, 'a')
            f.write('\n--------- oct27.dev errors: ---------\n')
            f.close()

        val_loss, val_vae_loss, val_l2_loss = 0, 0, 0
        val_acc, oov_val, wrong_val, correct_val = 0, 0, 0, 0

        if args.slow:
            pass
            #val_loss,val_vae_loss = get_val_set_loss(network, val_set, val_set_word, dev_author_vecs,epoch<kk, args.num_basis)
        #prev_time = time.time()
        valacc, val_acc, val_confidence, oov_val, wrong_val, correct_val, auth_acc2, non_auth_acc2, eids2, counts22 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        #valacc, val_acc, val_confidence, oov_val, wrong_val, correct_val, auth_acc2, non_auth_acc2, eids2, counts22 = get_val_set_acc(network, val_set, val_set_word, dev_author_vecs, dev_author_ids, epoch<kk, args.num_basis)
        #curr_time = time.time()
        #print ('time for acc val: ', curr_time - prev_time)

        if args.write_errors:
            f = open(args.log_errors_file, 'a')
            f.write('\n---------  oct27.test errors: --------- \n')
            f.close()
        test_loss = 0
        if args.slow:
            test_loss, test_vae_loss, test_l2_loss = get_val_set_loss(
                network, test_set, test_set_word, test_author_vecs, epoch < kk,
                args.num_basis)
        #prev_time = time.time()
        testacc, test_acc, test_confidence, oov_test, wrong_test, correct_test, auth_acc3, non_auth_acc3, eids3, counts23 = get_val_set_acc(
            network, test_set, test_set_word, test_author_vecs,
            test_author_ids, epoch < kk, args.num_basis)
        #curr_time = time.time()
        #print ('time for acc test: ', curr_time - prev_time)

        if args.write_errors:
            f = open(args.log_errors_file, 'a')
            f.write('\n---------  daily547.test errors: --------- \n')
            f.close()
        test_loss2 = 0
        if args.slow:
            test_loss2, test_vae_loss2, test2_l2_loss = get_val_set_loss(
                network, test_set2, test_set2_word, test2_author_vecs,
                epoch < kk, args.num_basis)
        #prev_time = time.time()
        testacc2, test_acc2, test2_confidence, oov_test2, wrong_test2, correct_test2, auth_acc4, non_auth_acc4, eids4, counts24 = get_val_set_acc(
            network, test_set2, test_set2_word, test2_author_vecs,
            test2_author_ids, epoch < kk, args.num_basis)
        #curr_time = time.time()
        #print ('time for acc test2: ', curr_time - prev_time)

        #test_loss2 = get_val_set_loss(network, test_set2, test_set2_word, test_author_vecs, epoch<kk)
        #test_acc2, oov_test2, wrong_test2, correct_test2, auth_acc4, non_auth_acc4, eids4 = get_val_set_acc(network, test_set2, test_set2_word, test_author_vecs,epoch<kk)

        #prev_time = time.time()
        logging.info('epoch %d done', epoch)
        logging.info(
            'train loss: %f, train vae loss: %f, train l2 loss: %f, train acc: %f',
            train_loss, train_vae_loss, train_l2_loss, trainacc)
        logging.info(
            'val loss: %f, val vae loss: %f, val l2 loss: %f, val acc: %f',
            val_loss, val_vae_loss, val_l2_loss, valacc)
        logging.info(
            'test loss: %f, test vae loss: %f, test l2 loss: %f, test acc: %f',
            test_loss, test_vae_loss, test_l2_loss, testacc)
        logging.info(
            'test2 loss: %f, tes2 vae loss: %f, tes2 l2 loss: %f, test2 acc: %f',
            test_loss2, test_vae_loss2, test2_l2_loss, testacc2)

        logging.info(
            ' oov_train: %d/%d, %d, oov_val: %d/%d, %d, oov_test: %d/%d, %d, oov_test2: %d/%d, %d',
            oov_train, wrong_train, correct_train, oov_val, wrong_val,
            correct_val, oov_test, wrong_test, correct_test, oov_test2,
            wrong_test2, correct_test2)

        logging.info(
            'train: author_acc: %f, non_author_acc: %f, ' + str(eids1) + ' ' +
            str(counts21), auth_acc1, non_auth_acc1)
        logging.info(
            'dev: author_acc: %f, non_author_acc: %f, ' + str(eids2) + ' ' +
            str(counts22), auth_acc2, non_auth_acc2)
        logging.info(
            'test: author_acc: %f, non_author_acc: %f, ' + str(eids3) + ' ' +
            str(counts23), auth_acc3, non_auth_acc3)
        logging.info(
            'test2: author_acc: %f, non_author_acc: %f, ' + str(eids4) + ' ' +
            str(counts24), auth_acc4, non_auth_acc4)

        if args.plots:
            test_acc, test_confidence, confusion_matrix, auth_acc, non_auth_acc, eids = get_val_set_acc2(
                network, test_set, test_set_word, test_author_vecs, epoch < kk,
                args.num_basis)
            df_cm = pd.DataFrame(confusion_matrix,
                                 index=[i for i in tag_to_ix.keys()],
                                 columns=[i for i in tag_to_ix.keys()])
            fig = plt.figure(figsize=(10, 7))
            sn.heatmap(df_cm, annot=True)
            fig.savefig('figs/conf_matrix_' + str(epoch) + '.png')
            #a = input()

        if args.combine_train_dev:
            valacc = testacc
        elif args.combine_train_dev_test:
            valacc = testacc2
        else:
            valacc = valacc

        m = network.model
        if epoch == 0:
            best_acc = valacc
            best_epoch = 0
            #best_val = val_loss
            #if args.combine_train_dev:
            #	best_acc = testacc
            #else:
            #	best_acc = valacc
            if args.save_model:
                m.save(args.save_model)
                logging.info('saving best model')
        else:
            #if args.combine_train_dev:
            #	valacc = testacc
            #
            #if best_acc < valacc:
            #	early_stopping = 0
            #	if args.combine_train_dev:
            #		best_acc = testacc
            #	else:
            #		best_acc = valacc
            if best_acc <= valacc:
                best_acc = valacc
                best_epoch = epoch
                if args.save_model:
                    m.save(args.save_model)
                    logging.info('re-saving best model')
            #else:
            #	early_stopping+=1
        logging.info('best model is at epoch no: %d', best_epoch)

    logging.info('\nbest model details are at epoch no: %d', best_epoch)

    #curr_time = time.time()
    #print ('time for rest junk: ', curr_time - prev_time)
    '''
	if count%batch_size!=0:
		batch_loss = dy.esum(batch_loss_vec)/len(batch_loss_vec)
		batch_loss.forward()
		batch_loss.backward()
		trainer.update()
		batch_loss_vec=[]
		dy.renew_cg()
	'''

    if args.plots:
        fig = plt.figure()
        plt.plot(iterations, losses)
        axes = plt.gca()
        axes.set_xlim([0, epochs])
        axes.set_ylim([0, 10000])

        fig.savefig('figs/loss_plot.png')
Ejemplo n.º 9
0
 def __init__(self, pc, epochs=40):
     self.epochs = epochs
     self.pc = pc
     self.trainer = dy.AdamTrainer(pc, alpha=.005)
     self.BATCH_SIZE = 1
     self.lr = .005
Ejemplo n.º 10
0
    def __init__(self, config, encodings, embeddings, runtime=False):
        self.config = config
        self.word_embeddings = embeddings
        self.encodings = encodings

        self.modelSS = dy.Model()
        self.modelTok = dy.Model()
        self.trainerSS = dy.AdamTrainer(self.modelSS,
                                        alpha=2e-3,
                                        beta_1=0.9,
                                        beta_2=0.9)
        self.trainerTok = dy.AdamTrainer(self.modelTok,
                                         alpha=2e-3,
                                         beta_1=0.9,
                                         beta_2=0.9)

        # sentence split model
        from wrappers import CNN, CNNConvLayer, CNNPoolingLayer
        from utils import orthonormal_VanillaLSTMBuilder
        # character-level-embeddings
        self.SS_char_lookup = self.modelSS.add_lookup_parameters(
            (len(self.encodings.char2int),
             self.config.ss_char_embeddings_size))
        self.SS_char_lookup_casing = self.modelSS.add_lookup_parameters(
            (3, 5))  # lower, upper N/A
        self.SS_char_lookup_special = self.modelSS.add_lookup_parameters(
            (2, self.config.ss_char_embeddings_size + 5))
        # lstm-peek network
        if runtime:
            self.SS_peek_lstm = dy.VanillaLSTMBuilder(
                self.config.ss_peek_lstm_layers,
                self.config.ss_char_embeddings_size + 5,
                self.config.ss_peek_lstm_size, self.modelSS)
        else:
            self.SS_peek_lstm = orthonormal_VanillaLSTMBuilder(
                self.config.ss_peek_lstm_layers,
                self.config.ss_char_embeddings_size + 5,
                self.config.ss_peek_lstm_size, self.modelSS)
        layer_is = self.config.ss_peek_lstm_size
        self.SS_aux_softmax_peek_w = self.modelSS.add_parameters((2, layer_is))
        self.SS_aux_softmax_peek_b = self.modelSS.add_parameters((2))
        if runtime:
            self.SS_lstm = dy.VanillaLSTMBuilder(
                self.config.ss_lstm_layers,
                self.config.ss_char_embeddings_size + 5,
                self.config.ss_lstm_size, self.modelSS)
        else:
            self.SS_lstm = orthonormal_VanillaLSTMBuilder(
                self.config.ss_lstm_layers,
                self.config.ss_char_embeddings_size + 5,
                self.config.ss_lstm_size, self.modelSS)

        self.SS_aux_softmax_prev_w = self.modelSS.add_parameters(
            (2, self.config.ss_lstm_size))
        self.SS_aux_softmax_prev_b = self.modelSS.add_parameters((2))

        # post MLP and softmax
        self.SS_mlp_w = []
        self.SS_mlp_b = []
        layer_is = self.config.ss_lstm_size + self.config.ss_peek_lstm_size
        for layer in self.config.ss_mlp_layers:
            self.SS_mlp_w.append(self.modelSS.add_parameters(
                (layer, layer_is)))
            self.SS_mlp_b.append(self.modelSS.add_parameters((layer)))
            layer_is = layer

        self.SS_mlp_softmax_w = self.modelSS.add_parameters((2, layer_is))
        self.SS_mlp_softmax_b = self.modelSS.add_parameters((2))

        # tokenization model
        self.TOK_char_lookup = self.modelTok.add_lookup_parameters(
            (len(self.encodings.char2int),
             self.config.tok_char_embeddings_size))
        self.TOK_char_lookup_casing = self.modelTok.add_lookup_parameters(
            (3, 5))  # lower, upper N/A
        self.TOK_char_lookup_special = self.modelTok.add_lookup_parameters(
            (2, self.config.tok_char_embeddings_size + 5))
        self.TOK_word_lookup = self.modelTok.add_lookup_parameters(
            (len(self.encodings.word2int),
             self.config.tok_word_embeddings_size))

        self.TOK_word_embeddings_special = self.modelTok.add_lookup_parameters(
            (2, self.word_embeddings.word_embeddings_size))

        self.TOK_word_proj_w = self.modelTok.add_parameters(
            (self.config.tok_word_embeddings_size,
             self.word_embeddings.word_embeddings_size))
        # lstm networks
        if runtime:
            self.TOK_backward_lstm = dy.VanillaLSTMBuilder(
                self.config.tok_char_peek_lstm_layers,
                self.config.tok_char_embeddings_size + 5,
                self.config.tok_char_peek_lstm_size, self.modelTok)
            self.TOK_forward_lstm = dy.VanillaLSTMBuilder(
                self.config.tok_char_lstm_layers,
                self.config.tok_char_embeddings_size + 5,
                self.config.tok_char_lstm_size, self.modelTok)
            self.TOK_word_lstm = dy.VanillaLSTMBuilder(
                self.config.tok_word_lstm_layers,
                self.config.tok_word_embeddings_size,
                self.config.tok_word_lstm_size, self.modelTok)
        else:
            self.TOK_backward_lstm = orthonormal_VanillaLSTMBuilder(
                self.config.tok_char_peek_lstm_layers,
                self.config.tok_char_embeddings_size + 5,
                self.config.tok_char_peek_lstm_size, self.modelTok)
            self.TOK_forward_lstm = orthonormal_VanillaLSTMBuilder(
                self.config.tok_char_lstm_layers,
                self.config.tok_char_embeddings_size + 5,
                self.config.tok_char_lstm_size, self.modelTok)
            self.TOK_word_lstm = orthonormal_VanillaLSTMBuilder(
                self.config.tok_word_lstm_layers,
                self.config.tok_word_embeddings_size,
                self.config.tok_word_lstm_size, self.modelTok)

        self.TOK_mlp_w = []
        self.TOK_mlp_b = []
        layer_input = self.config.tok_word_lstm_size + self.config.tok_char_lstm_size + self.config.tok_char_peek_lstm_size + 2 + self.config.tok_word_embeddings_size
        for layer_size in self.config.tok_mlp_layers:
            self.TOK_mlp_w.append(
                self.modelTok.add_parameters((layer_size, layer_input)))
            self.TOK_mlp_b.append(self.modelTok.add_parameters((layer_size)))
            layer_input = layer_size

        self.TOK_softmax_w = self.modelTok.add_parameters((2, layer_input))
        self.TOK_softmax_b = self.modelTok.add_parameters((2))
        self.TOK_softmax_peek_w = self.modelTok.add_parameters(
            (2, self.config.tok_char_peek_lstm_size))
        self.TOK_softmax_peek_b = self.modelTok.add_parameters((2))
        self.TOK_softmax_prev_w = self.modelTok.add_parameters(
            (2, self.config.tok_char_lstm_size))
        self.TOK_softmax_prev_b = self.modelTok.add_parameters((2))

        self.losses = []
        self.losses_tok = []
Ejemplo n.º 11
0
UNK = w2i["<unk>"]
def read_dataset(filename):
  with open(filename, "r") as f:
    for line in f:
      yield [w2i[x] for x in line.strip().split(" ")]

# Read in the data
train = list(read_dataset("../data/ptb/train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("../data/ptb/valid.txt"))
i2w = {v: k for k, v in w2i.items()}
nwords = len(w2i)

# Start DyNet and define trainer
model = dy.Model()
trainer = dy.AdamTrainer(model, alpha=0.001)

# Define the model
W_emb = model.add_lookup_parameters((nwords, EMB_SIZE)) # Word weights at each position
W_h_p = model.add_parameters((HID_SIZE, EMB_SIZE * N))    # Weights of the softmax
b_h_p = model.add_parameters((HID_SIZE))                  # Weights of the softmax
W_sm_p = model.add_parameters((nwords, HID_SIZE))         # Weights of the softmax
b_sm_p = model.add_parameters((nwords))                   # Softmax bias

# A function to calculate scores for one value
def calc_score_of_histories(words, dropout=0.0):
  # This will change from a list of histories, to a list of words in each history position
  words = np.transpose(words)
  # Lookup the embeddings and concatenate them
  emb = dy.concatenate([dy.lookup_batch(W_emb, x) for x in words])
  # Create the hidden layer
Ejemplo n.º 12
0
    model.enable_dropout()

    uas = correct_ua / num_tokens
    las = correct_la / num_tokens
    print("\nUAS: {0:.4}, LAS: {1:.4}".format(uas, las))

if __name__ == "__main__":

    basename = "../build/en"
    index = read_index(basename)
    train_data = list(map_to_instances(read_conllu("../treebanks/train/en/en.conllu"), index, FIELDS))

    pc = dy.ParameterCollection()
    model = MLPParser(pc, basename="../build/en")
    model.enable_dropout()
    trainer = dy.AdamTrainer(pc)

    print("training sentences: {0}, tokens: {1}".format(len(train_data), sum([len(tree) for tree in train_data])))

    batch_size = 50
    max_steps = 1000

    step = 0
    total_loss = 0
    batch_loss = []
    batch_tokens = 0

    dy.renew_cg()
    for tree in shuffled_stream(train_data):

        batch_loss.append(arc_loss(model, tree))
Ejemplo n.º 13
0
    def __init__(self,
                 lemmatizer_config,
                 encodings,
                 embeddings,
                 runtime=False):
        self.config = lemmatizer_config
        self.encodings = encodings
        self.embeddings = embeddings
        self.losses = []

        self.model = dy.Model()
        self.trainer = dy.AdamTrainer(self.model,
                                      alpha=2e-3,
                                      beta_1=0.9,
                                      beta_2=0.9)

        self.character_network = CharacterNetwork(
            self.config.tag_embeddings_size,
            encodings,
            rnn_size=self.config.char_rnn_size,
            rnn_layers=self.config.char_rnn_layers,
            embeddings_size=self.config.char_embeddings,
            model=self.model,
            runtime=runtime)

        self.upos_lookup = self.model.add_lookup_parameters(
            (len(self.encodings.upos2int), self.config.tag_embeddings_size))
        self.xpos_lookup = self.model.add_lookup_parameters(
            (len(self.encodings.xpos2int), self.config.tag_embeddings_size))
        self.attrs_lookup = self.model.add_lookup_parameters(
            (len(self.encodings.attrs2int), self.config.tag_embeddings_size))
        self.char_lookup = self.model.add_lookup_parameters(
            (len(self.encodings.char2int), self.config.char_embeddings))

        if runtime:
            self.rnn = dy.LSTMBuilder(
                self.config.rnn_layers,
                self.config.char_rnn_size * 2 + self.config.char_embeddings,
                self.config.rnn_size, self.model)
        else:
            from utils import orthonormal_VanillaLSTMBuilder
            self.rnn = orthonormal_VanillaLSTMBuilder(
                self.config.rnn_layers,
                self.config.char_rnn_size * 2 + self.config.char_embeddings,
                self.config.rnn_size, self.model)

        self.att_w1 = self.model.add_parameters(
            (200, self.config.char_rnn_size * 2))
        self.att_w2 = self.model.add_parameters(
            (200, self.config.rnn_size + self.config.tag_embeddings_size))
        self.att_v = self.model.add_parameters((1, 200))

        self.start_lookup = self.model.add_lookup_parameters(
            (1, self.config.char_rnn_size * 2 + self.config.char_embeddings))

        self.softmax_w = self.model.add_parameters(
            (len(self.encodings.char2int) + 1, self.config.rnn_size))
        self.softmax_b = self.model.add_parameters(
            (len(self.encodings.char2int) + 1))
        self.softmax_casing_w = self.model.add_parameters(
            (2, self.config.rnn_size))
        self.softmax_casing_b = self.model.add_parameters((2))
Ejemplo n.º 14
0
    def train(self):

        trainer = dy.AdamTrainer(self.model)

        total_processed = 0
        current_processed = 0
        check_every = len(self.train_trees) / 3

        best_dev_score = -np.inf
        
        start_time = time.time()
        batch_size = 10
        epochs = 1
        for epoch in itertools.count(start=1):
            if epoch > epochs:
                break
            np.random.shuffle(self.train_trees)
            epoch_start_time = time.time()

            for start_index in range(0, len(self.train_trees), batch_size):
                dy.renew_cg()
                batch_losses = []
                for tree in self.train_trees[start_index:start_index + batch_size]:
                    document = [leaf._text for leaf in tree.leaves()]
                    _, loss = self.parse(document, tree)
                    batch_losses.append(loss)
                    total_processed += 1
                    current_processed += 1

                batch_loss = dy.average(batch_losses)
                batch_loss_value = batch_loss.scalar_value()
                batch_loss.backward()
                trainer.update()

                print(
                "epoch {:,} "
                "batch {:,}/{:,} "
                "processed {:,} "
                "batch-loss {:.4f} ".format(
                    epoch,
                    start_index // batch_size + 1,
                    int(np.ceil(len(self.train_trees) / batch_size)),
                    total_processed,
                    batch_loss_value,
                    #format_elapsed(epoch_start_time),
                    #format_elapsed(start_time),
                    )
                )

                if current_processed >= check_every:
                    current_processed -= check_every
                    dev_predicted = []
                    scores = 0
                    for tree in self.dev_trees:
                        dy.renew_cg()
                        sentence = [leaf._text for leaf in tree.leaves()]
                        predicted, score = self.parse(sentence)
                        dev_predicted.append((predicted, score.npvalue()))

                    for i, (predicted, score) in enumerate(dev_predicted):
                        scores += score
                        i_path = os.path.join("predicted_long", self.trees_indices[i])
                        try:
                            os.mkdir("predicted_long")
                        except FileExistsError:
                            pass
                        try:
                            os.mkdir(i_path)
                        except FileExistsError:
                            pass
                        j = 0
                        while os.path.exists(os.path.join(i_path, str(j))):
                            j += 1
                        with open(os.path.join(i_path, str(j)), "w") as f:
                            f.write(myTree.tree_utils.output_tree(predicted))
                        with open(os.path.join(i_path, str(j)+'.score.txt'), 'w') as f:
                            f.write(str(score))
                    logger.info("Score: {}".format(score))
Ejemplo n.º 15
0
    def __init__(self, vocab, pos, xpos, rels, w2i, c2i, ext_words_train,
                 ext_words_devtest, options):

        self.model = dy.ParameterCollection()
        self.pretrained_embs = dy.ParameterCollection()
        self.learning_rate = options.learning_rate
        self.trainer = dy.AdamTrainer(self.model,
                                      alpha=self.learning_rate,
                                      beta_1=0.9,
                                      beta_2=0.9,
                                      eps=1e-12)

        self.dropout = float(options.dropout)
        self.ldims = options.lstm_dims
        self.hidden2 = options.hidden_2
        self.wdims = options.wembedding_dims
        self.cdims = options.cembedding_dims
        self.posdims = options.posembedding_dims
        self.pred_batch_size = options.pred_batch_size
        self.ext_words_train = {
            word: ind + 2
            for word, ind in ext_words_train.items()
        }
        self.ext_words_devtest = {
            word: ind + 2
            for word, ind in ext_words_devtest.items()
        }
        self.wordsCount = vocab
        self.vocab = {word: ind + 2 for word, ind in w2i.items()}
        self.pos = {word: ind + 2 for ind, word in enumerate(pos)}
        self.id2pos = {ind: word for word, ind in self.pos.items()}
        self.xpos = {word: ind + 2 for ind, word in enumerate(xpos)}
        self.id2xpos = {ind: word for word, ind in self.xpos.items()}
        self.c2i = c2i
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.irels = {ind: word for word, ind in self.rels.items()}
        self.vocab['PAD'] = 1
        self.pos['PAD'] = 1
        self.xpos['PAD'] = 1

        self.external_embedding, self.edim, self.edim_out = None, 0, 0
        if options.external_embedding is not None:

            self.external_embedding = np.load(options.external_embedding)
            self.ext_voc = pickle.load(
                open(options.external_embedding_voc, "rb"))
            self.edim = self.external_embedding.shape[1]
            self.projected_embs = Lin_Projection(self.model, self.edim,
                                                 self.wdims)
            self.elookup_train = self.pretrained_embs.add_lookup_parameters(
                (len(self.ext_words_train) + 2, self.edim))
            for word, i in self.ext_words_train.items():
                self.elookup_train.init_row(
                    i, self.external_embedding[self.ext_voc[word], :])
            self.elookup_train.init_row(0, np.zeros(self.edim))
            self.elookup_train.init_row(1, np.zeros(self.edim))

            self.elookup_devtest = self.pretrained_embs.add_lookup_parameters(
                (len(self.ext_words_devtest) + 2, self.edim))
            for word, i in self.ext_words_devtest.items():
                self.elookup_devtest.init_row(
                    i, self.external_embedding[self.ext_voc[word], :])
            self.elookup_devtest.init_row(0, np.zeros(self.edim))
            self.elookup_devtest.init_row(1, np.zeros(self.edim))

            self.ext_words_train['PAD'] = 1
            self.ext_words_devtest['PAD'] = 1

            print(
                'Load external embeddings. External embeddings vectors dimension',
                self.edim)

        #LSTMs
        self.fwdLSTM1 = LSTM(self.model,
                             self.wdims + self.posdims,
                             self.ldims,
                             forget_bias=0.0)
        self.bwdLSTM1 = LSTM(self.model,
                             self.wdims + self.posdims,
                             self.ldims,
                             forget_bias=0.0)
        self.fwdLSTM2 = LSTM(self.model,
                             2 * self.ldims,
                             self.ldims,
                             forget_bias=0.0)
        self.bwdLSTM2 = LSTM(self.model,
                             2 * self.ldims,
                             self.ldims,
                             forget_bias=0.0)

        self.attention = AttentionDecoder(self.model,
                                          len(self.rels),
                                          src_ctx_dim=self.ldims * 2,
                                          hidden=self.hidden2,
                                          dropout=self.dropout)

        self.HybridCharembs = HybridCharacterAttention(self.model,
                                                       ldims=400,
                                                       input_size=self.cdims,
                                                       output_size=self.wdims,
                                                       dropout=self.dropout)

        self.wlookup = self.model.add_lookup_parameters(
            (len(vocab) + 2, self.wdims), init=dy.ConstInitializer(0))
        #0 for unknown 1 for [PAD]
        self.poslookup = self.model.add_lookup_parameters(
            (len(self.pos) + 2, self.posdims), init=dy.ConstInitializer(0))
        #0 for unknown 1 for  [PAD]
        self.xposlookup = self.model.add_lookup_parameters(
            (len(self.xpos) + 2, self.posdims), init=dy.ConstInitializer(0))
        #0 for unknown 1 for  [PAD]

        self.clookup = self.model.add_lookup_parameters(
            (len(c2i), self.cdims), init=dy.NormalInitializer())
        self.ROOT = self.model.add_parameters((self.wdims * 2),
                                              init=dy.ConstInitializer(0))
    def __init__(self):

        # first initialize a computation graph container (or model).
        self.nnmodel = dynet.Model()

        # assign the algorithm for backpropagation updates.
        self.updater = dynet.AdamTrainer(self.nnmodel)

        num_words, num_tags, num_labels = 4808, 46, 46
        word_embed_dim, pos_embed_dim, label_embed_dim = 100, 32, 32
        hidden_layer1_dim, hidden_layer2_dim = 600, 600
        num_actions = 93

        self.minibatch_size = 1000

        # create embeddings for words and tag features.
        self.word_embedding = self.nnmodel.add_lookup_parameters(
            (num_words, word_embed_dim))

        glove_word_embeddings_dict = {
        }  # key is the word, value is the list of 100 embeddings
        embed_lines = open("glove.6B.100d.txt", 'r').read().splitlines()
        for line in embed_lines:
            word = line.split()[0]
            values = line.split()
            del values[0]
            glove_word_embeddings_dict[word] = values

        vocab_words = open("./data/vocabs.word", 'r').read().splitlines()
        i = 0
        for word_line in vocab_words:
            word = word_line.split()[0]
            if (word in glove_word_embeddings_dict):
                self.word_embedding[i] = np.asarray(
                    glove_word_embeddings_dict[word])

        self.pos_embedding = self.nnmodel.add_lookup_parameters(
            (num_tags, pos_embed_dim))
        self.label_embedding = self.nnmodel.add_lookup_parameters(
            (num_labels, label_embed_dim))

        # mbda x: dynet.bmax(.1 * x, x))assign transfer function
        self.transfer = (lambda x: dynet.bmax(.1 * x, x))

        self.input_dim = 20 * (word_embed_dim +
                               pos_embed_dim) + 12 * label_embed_dim

        self.hidden_layer1 = self.nnmodel.add_parameters(
            (hidden_layer1_dim, self.input_dim))
        self.hidden_layer1_bias = self.nnmodel.add_parameters(
            hidden_layer1_dim, init=dynet.ConstInitializer(0.2))

        self.hidden_layer2 = self.nnmodel.add_parameters(
            (hidden_layer2_dim, hidden_layer1_dim))
        self.hidden_layer2_bias = self.nnmodel.add_parameters(
            hidden_layer2_dim, init=dynet.ConstInitializer(0.2))

        # define the output weight.
        self.output_layer = self.nnmodel.add_parameters(
            (num_actions, hidden_layer2_dim))

        # define the bias vector and initialize it as zero.
        self.output_bias = self.nnmodel.add_parameters(
            num_actions, init=dynet.ConstInitializer(0))

        self.dropout_prob = 0.2
        '''
            You can add more arguments for examples actions and model paths.
            You need to load your model here.
            actions: provides indices for actions.
            it has the same order as the data/vocabs.actions file.
        '''
        # if you prefer to have your own index for actions, change this.
        self.actions = [
            'SHIFT', 'LEFT-ARC:prep', 'LEFT-ARC:dobj', 'LEFT-ARC:poss',
            'LEFT-ARC:amod', 'LEFT-ARC:xcomp', 'LEFT-ARC:mark',
            'LEFT-ARC:conj', 'LEFT-ARC:nn', 'LEFT-ARC:rcmod', 'LEFT-ARC:advcl',
            'LEFT-ARC:cc', 'LEFT-ARC:pcomp', 'LEFT-ARC:expl', 'LEFT-ARC:tmod',
            'LEFT-ARC:csubj', 'LEFT-ARC:number', 'LEFT-ARC:iobj',
            'LEFT-ARC:<null>', 'LEFT-ARC:preconj', 'LEFT-ARC:nsubj',
            'LEFT-ARC:appos', 'LEFT-ARC:infmod', 'LEFT-ARC:partmod',
            'LEFT-ARC:ccomp', 'LEFT-ARC:aux', 'LEFT-ARC:auxpass',
            'LEFT-ARC:parataxis', 'LEFT-ARC:det', 'LEFT-ARC:punct',
            'LEFT-ARC:discourse', 'LEFT-ARC:dep', 'LEFT-ARC:cop',
            'LEFT-ARC:pobj', 'LEFT-ARC:num', 'LEFT-ARC:prt',
            'LEFT-ARC:possessive', 'LEFT-ARC:rroot', 'LEFT-ARC:npadvmod',
            'LEFT-ARC:mwe', 'LEFT-ARC:neg', 'LEFT-ARC:predet',
            'LEFT-ARC:nsubjpass', 'LEFT-ARC:quantmod', 'LEFT-ARC:root',
            'LEFT-ARC:acomp', 'LEFT-ARC:advmod', 'RIGHT-ARC:prep',
            'RIGHT-ARC:dobj', 'RIGHT-ARC:poss', 'RIGHT-ARC:amod',
            'RIGHT-ARC:xcomp', 'RIGHT-ARC:mark', 'RIGHT-ARC:conj',
            'RIGHT-ARC:nn', 'RIGHT-ARC:rcmod', 'RIGHT-ARC:advcl',
            'RIGHT-ARC:cc', 'RIGHT-ARC:pcomp', 'RIGHT-ARC:expl',
            'RIGHT-ARC:tmod', 'RIGHT-ARC:csubj', 'RIGHT-ARC:number',
            'RIGHT-ARC:iobj', 'RIGHT-ARC:<null>', 'RIGHT-ARC:preconj',
            'RIGHT-ARC:nsubj', 'RIGHT-ARC:appos', 'RIGHT-ARC:infmod',
            'RIGHT-ARC:partmod', 'RIGHT-ARC:ccomp', 'RIGHT-ARC:aux',
            'RIGHT-ARC:auxpass', 'RIGHT-ARC:parataxis', 'RIGHT-ARC:det',
            'RIGHT-ARC:punct', 'RIGHT-ARC:discourse', 'RIGHT-ARC:dep',
            'RIGHT-ARC:cop', 'RIGHT-ARC:pobj', 'RIGHT-ARC:num',
            'RIGHT-ARC:prt', 'RIGHT-ARC:possessive', 'RIGHT-ARC:rroot',
            'RIGHT-ARC:npadvmod', 'RIGHT-ARC:mwe', 'RIGHT-ARC:neg',
            'RIGHT-ARC:predet', 'RIGHT-ARC:nsubjpass', 'RIGHT-ARC:quantmod',
            'RIGHT-ARC:root', 'RIGHT-ARC:acomp', 'RIGHT-ARC:advmod'
        ]
Ejemplo n.º 17
0
    def __init__(self,
                 word_size,
                 tag_size,
                 rel_size,
                 input_dim,
                 hidden_dim,
                 pdrop_embs,
                 pdrop_lstm,
                 pdrop_mlp,
                 layers,
                 mlp_dim,
                 arc_dim,
                 biaffine_bias_x_arc,
                 biaffine_bias_y_arc,
                 biaffine_bias_x_rel,
                 biaffine_bias_y_rel,
                 embs_word=None):

        self._global_step = 0
        self._early_stop_count = 0
        self._update = False
        self._best_score = 0.
        self._best_score_las = 0.

        self._punct_id = 0

        self._masks_w = []
        self._masks_t = []

        self._vocab_size_w = word_size
        self._vocab_size_t = tag_size
        self._vocab_size_r = rel_size

        self._mlp_dim = mlp_dim
        self._arc_dim = arc_dim
        self._rel_dim = mlp_dim - arc_dim
        self.biaffine_bias_x_arc = biaffine_bias_x_arc
        self.biaffine_bias_y_arc = biaffine_bias_y_arc
        self.biaffine_bias_x_rel = biaffine_bias_x_rel
        self.biaffine_bias_y_rel = biaffine_bias_y_rel

        self._pc = dy.ParameterCollection()

        if config.adam:
            self._trainer = dy.AdamTrainer(self._pc, config.learning_rate,
                                           config.beta_1, config.beta_2,
                                           config.epsilon)
        else:
            # self._trainer = dy.AdadeltaTrainer(self._pc)
            trainer = dy.SimpleSGDTrainer(self._pc, config.learning_rate)
            trainer.set_clip_threshold(config.clip_threshold)

        # self._trainer.set_clip_threshold(1.0)

        self.params = dict()
        if embs_word is None:
            self.lp_w = self._pc.add_lookup_parameters(
                (word_size, input_dim), init=dy.ConstInitializer(0.))
        else:
            self.lp_w = self._pc.lookup_parameters_from_numpy(embs_word)
        self.lp_t = self._pc.add_lookup_parameters(
            (tag_size, input_dim), init=dy.ConstInitializer(0.))
        self.emb_root = self._pc.add_lookup_parameters(
            (2, input_dim), init=dy.ConstInitializer(0.))

        # if config.isTest:
        #     self.l2r_lstm = dy.VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc)
        #     self.r2l_lstm = dy.VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc)
        # else:
        #     self.l2r_lstm = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc)
        #     self.r2l_lstm = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc)
        self._pdrop_embs = pdrop_embs
        self._pdrop_lstm = pdrop_lstm
        self._pdrop_mlp = pdrop_mlp

        self.LSTM_builders = []

        # f = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc)
        # b = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc)
        #
        # self.LSTM_builders = [f, b]

        f = utils.orthonormal_VanillaLSTMBuilder(1, input_dim * 2, hidden_dim,
                                                 self._pc)
        b = utils.orthonormal_VanillaLSTMBuilder(1, input_dim * 2, hidden_dim,
                                                 self._pc)

        self.LSTM_builders.append((f, b))
        for i in range(layers - 1):
            f = utils.orthonormal_VanillaLSTMBuilder(1, 2 * hidden_dim,
                                                     hidden_dim, self._pc)
            b = utils.orthonormal_VanillaLSTMBuilder(1, 2 * hidden_dim,
                                                     hidden_dim, self._pc)
            self.LSTM_builders.append((f, b))

        if config.biaffine:
            W = utils.orthonormal_initializer(mlp_dim, 2 * hidden_dim)
            self.mlp_dep = self._pc.parameters_from_numpy(W)
            self.mlp_head = self._pc.parameters_from_numpy(W)
            self.mlp_dep_bias = self._pc.add_parameters(
                (mlp_dim, ), init=dy.ConstInitializer(0.))
            self.mlp_head_bias = self._pc.add_parameters(
                (mlp_dim, ), init=dy.ConstInitializer(0.))
        else:
            W = utils.orthonormal_initializer(mlp_dim * 2, 2 * hidden_dim)
            self.mlp = self._pc.parameters_from_numpy(W)
            self.mlp_bias = self._pc.add_parameters(
                (mlp_dim * 2, ), init=dy.ConstInitializer(0.))

        # self.mlp_arc_size = mlp_arc_size
        # self.mlp_rel_size = mlp_rel_size
        # self.dropout_mlp = dropout_mlp
        if config.biaffine:
            self.W_arc = self._pc.add_parameters(
                (self._arc_dim, self._arc_dim + 1),
                init=dy.ConstInitializer(0.))
            self.W_rel = self._pc.add_parameters(
                (self._vocab_size_r * (self._rel_dim + 1), self._rel_dim + 1),
                init=dy.ConstInitializer(0.))
        else:
            self.V_r_arc = self._pc.add_parameters((self._arc_dim))
            self.V_i_arc = self._pc.add_parameters((self._arc_dim))
            self.bias_arc = self._pc.add_parameters((self._arc_dim * 2))
            self.V_r_rel = self._pc.add_parameters(
                (self._rel_dim * self._vocab_size_r))
            self.V_i_rel = self._pc.add_parameters(
                (self._rel_dim * self._vocab_size_r))
            self.bias_rel = self._pc.add_parameters(
                (self._rel_dim * self._vocab_size_r * 2))

        return
Ejemplo n.º 18
0
    def __init__(self, args, vocabLengthSource, vocabLengthActionRule,
                 vocabLengthNodes, vocabLengthTarget):

        self.flag_copy = True

        self.vocabLengthSource = vocabLengthSource
        self.vocabLengthActionRule = vocabLengthActionRule
        self.vocabLengthNodes = vocabLengthNodes
        self.vocabLengthTarget = vocabLengthTarget

        # parameters for the model
        self.numLayer = args.numLayer
        self.embeddingSourceSize = args.embeddingSourceSize
        self.embeddingApplySize = args.embeddingApplySize
        self.embeddingGenSize = args.embeddingGenSize
        self.embeddingNodeSize = args.embeddingNodeSize
        self.hiddenSize = args.hiddenSize
        self.attSize = args.attSize
        self.pointerSize = args.pointerSize
        self.dropout = args.dropout
        self.embeddingRuletypeSize = 2
        self.learningRate = args.learningRate

        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=self.learningRate)

        # source lookup
        self.sourceLookup = self.model.add_lookup_parameters(
            (self.vocabLengthSource, self.embeddingSourceSize))

        # action embeddging matrix
        self.actionRuleLookup = self.model.add_lookup_parameters(
            (self.vocabLengthActionRule, self.embeddingApplySize))

        # for node type lookup
        self.nodeTypeLookup = self.model.add_lookup_parameters(
            (self.vocabLengthNodes, self.embeddingNodeSize))

        # gor gen type lookup
        self.gentokenLookup = self.model.add_lookup_parameters(
            (self.vocabLengthTarget, self.embeddingGenSize))

        # adding paramteters to the AST Neural Network
        self.attentionSource = self.model.add_parameters(
            (self.attSize, self.hiddenSize * 2))
        self.attentionTarget = self.model.add_parameters(
            (self.attSize, self.numLayer * self.hiddenSize * 2))
        self.attentionParameter = self.model.add_parameters((1, self.attSize))

        self.w_selection_gen_softmax = self.model.add_parameters(
            (2, self.hiddenSize))

        self.w_out_rule = self.model.add_parameters(
            (self.embeddingApplySize,
             self.hiddenSize))  # should change whe hidden layers increase
        self.b_out_rule = self.model.add_parameters((self.embeddingApplySize))

        self.w_out_vocab = self.model.add_parameters(
            (self.embeddingApplySize, self.hiddenSize +
             self.hiddenSize * 2))  # should change whe hidden layers increase
        self.b_out_vocab = self.model.add_parameters((self.embeddingApplySize))

        self.w_pointer_hidden = self.model.add_parameters(
            (self.pointerSize,
             2 * self.hiddenSize + 2 * self.hiddenSize + self.hiddenSize))
        self.b_pointer_hidden = self.model.add_parameters((self.pointerSize))
        self.w_pointer_out = self.model.add_parameters((1, self.pointerSize))
        self.b_pointer_out = self.model.add_parameters((1))
        # initializing the encoder and decoder
        self.forward_encoder = dy.LSTMBuilder(self.numLayer,
                                              self.embeddingSourceSize,
                                              self.hiddenSize, self.model)
        self.backward_encoder = dy.LSTMBuilder(self.numLayer,
                                               self.embeddingSourceSize,
                                               self.hiddenSize, self.model)

        # check this
        # embedding size + (previous action embedding + context vector + node type mebedding + parnnet feeding )
        # parent feeding - hidden states of parent action + embedding of parent action
        self.inputDecoderSize = self.embeddingApplySize + self.hiddenSize * 2 + self.hiddenSize + self.embeddingApplySize + self.embeddingNodeSize
        self.decoder = dy.VanillaLSTMBuilder(self.numLayer,
                                             self.inputDecoderSize,
                                             self.hiddenSize, self.model)
Ejemplo n.º 19
0
EPOCHS = 500
BATCH_SIZE = 50
HIDDEN_LAYER = 1
NUM_OUT = 100
VOCAB_SIZE = 13
start_time = time.time()

model = dn.Model()
input_lookup = model.add_lookup_parameters((VOCAB_SIZE, EMBEDDINGS_SIZE))
lstm = dn.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model)
output_w_1 = model.add_parameters((VOCAB_SIZE, STATE_SIZE))
output_b_1 = model.add_parameters((VOCAB_SIZE))

output_w_2 = model.add_parameters((NUM_OUT, VOCAB_SIZE))
output_b_2 = model.add_parameters((NUM_OUT))
trainer = dn.AdamTrainer(model)


def passed_time(previous_time):
    return round(time.time() - previous_time, 3)


def read_to_data(name_file):
    words_tags = []
    max_length = 0
    words = open(name_file, "r").read().split('\n')
    for word_tag in words:
        if word_tag != "":
            word, tag = word_tag.split('/')
            words_tags.append((word, tag))
            if len(word) > max_length:
Ejemplo n.º 20
0
    def __init__(self, vocab, pos, rels, w2i, options):
        self.model = dn.Model()
        random.seed(1)
        self.trainer = dn.AdamTrainer(self.model)

        self.activation = self.activations[options.activation]
        self.decoder = self.decoders[options.decoder](options)
        self.test_decoder = self.decoders[options.test_decoder](options) \
            if options.test_decoder is not None \
            else self.decoder
        self.cost_augment = cost_augments[options.cost_augment]

        self.labelsFlag = options.labelsFlag
        self.options = options

        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.pdims = options.pembedding_dims
        self.rdims = options.rembedding_dims
        self.layers = options.lstm_layers
        self.wordsCount = vocab
        self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()}
        self.pos = {word: ind + 3 for ind, word in enumerate(pos)}
        self.rels = {word: ind
                     for ind, word in enumerate(rels)}  # type: dict[str, int]
        self.irels = rels

        if options.external_embedding is not None:
            self.extrnd, self.elookup, self.edim = nn.get_external_embedding(
                self.model, options.external_embedding)
            logger.info('Load external embedding. Vector dimensions %d',
                        self.edim)
        else:
            self.extrnd, self.elookup, self.edim = None, None, 0

        dims = self.wdims + self.pdims + self.edim
        self.rnn = nn.BiLSTM(self.model,
                             [dims] + [self.ldims * 2] * options.lstm_layers)

        self.hidden_units = options.hidden_units
        self.hidden2_units = options.hidden2_units

        self.vocab['*PAD*'] = 1
        self.pos['*PAD*'] = 1

        self.vocab['*INITIAL*'] = 2
        self.pos['*INITIAL*'] = 2

        self.wlookup = self.model.add_lookup_parameters(
            (len(vocab) + 3, self.wdims))
        self.plookup = self.model.add_lookup_parameters(
            (len(pos) + 3, self.pdims))
        self.rlookup = self.model.add_lookup_parameters(
            (len(rels), self.rdims))

        if self.hidden2_units > 0:
            dense_dims = [self.hidden_units, self.hidden2_units, 1]
            use_bias = [True, False]
        else:
            dense_dims = [self.hidden_units, 1]
            # use_bias = [dn.NormalInitializer(0, 0)]
            use_bias = [False]

        self.head_dense_layer = DenseLayers(
            self.model, [self.ldims * 2, self.hidden_units], self.activation)
        self.dep_dense_layer = DenseLayers(self.model,
                                           [self.ldims * 2, self.hidden_units],
                                           self.activation)

        self.fusion_layer = nn.Biaffine(self.model, self.hidden_units,
                                        self.activation)

        if self.labelsFlag:
            self.relation_binear_layer = BiLinear(self.model, self.ldims * 2,
                                                  self.hidden_units)
            relation_dense_dims = list(dense_dims)
            relation_dense_dims[-1] = len(self.irels)

            self.relation_dense_layer = DenseLayers(self.model,
                                                    relation_dense_dims,
                                                    self.activation)
Ejemplo n.º 21
0
        meta.n_out = len(meta.tdmaps)
        meta.rmaps = {v: k for k, v in meta.tdmaps.items()}

        meta.w2i = {}
        for w in wvm.vocab:
            meta.w2i[w] = wvm.vocab[w].index

    if args.save_model:
        pickle.dump(meta, open('%s.meta' % args.save_model, 'wb'))
    if args.load_model:
        ontoparser = SubsumptionLearning(model=args.load_model)
    else:
        ontoparser = SubsumptionLearning(meta=meta)
        trainers = {
            'momsgd': dy.MomentumSGDTrainer(ontoparser.model, edecay=0.25),
            'adam': dy.AdamTrainer(ontoparser.model, edecay=0.25),
            'simsgd': dy.SimpleSGDTrainer(ontoparser.model, edecay=0.25),
            'adagrad': dy.AdagradTrainer(ontoparser.model, edecay=0.25),
            'adadelta': dy.AdadeltaTrainer(ontoparser.model, edecay=0.25)
        }
        trainer = trainers[args.trainer]
        nntraining(train_sents)

    if args.dev:
        accuracy = Test(inputGenDev)
        sys.stdout.write("Accuracy: {}%\n".format(accuracy))

    if args.isDaemon and args.daemonPort:
        sys.stderr.write('Leastening at port %d\n' % args.daemonPort)
        host = "0.0.0.0"  #Listen on all interfaces
        port = args.daemonPort  #Port number
import copy
import dynet as dy
import numpy as np
import os

from common import BEGIN_CHAR, STOP_CHAR, UNK_CHAR, SRC_FOLDER, RESULTS_FOLDER, DATA_FOLDER, check_path, write_pred_file, write_param_file, write_eval_file
from vocab_builder import build_vocabulary, Vocab
from norm_soft import SoftDataSet

MAX_PRED_SEQ_LEN = 50  # option
OPTIMIZERS = {
    'ADAM':
    lambda m: dy.AdamTrainer(
        m,
        lam=0.0,
        alpha=0.0001,  #common
        beta_1=0.9,
        beta_2=0.999,
        eps=1e-8),
    'SGD':
    dy.SimpleSGDTrainer,
    'ADADELTA':
    dy.AdadeltaTrainer
}

### IO handling and evaluation


def load_data(filename, input_format, lowercase=False):
    """ Load data from file
        
def main():
    # Read in data
    wids_en=defaultdict(lambda: len(wids_en))
    wids_de=defaultdict(lambda: len(wids_de))

    train_sentences_en=readData.read_corpus(wids_en,mode="train",update_dict=True,min_frequency=MIN_EN_FREQUENCY,language="en")
    train_sentences_de=readData.read_corpus(wids_de,mode="train",update_dict=True,min_frequency=MIN_DE_FREQUENCY,language="de")

    enDictionaryFile="Models/"+"en-dict_"+str(MIN_EN_FREQUENCY)+".txt" 
    deDictionaryFile="Models/"+"de-dict_"+str(MIN_DE_FREQUENCY)+".txt"

    dicFile=open(enDictionaryFile,"w")
    print len(wids_en)
    for key in wids_en:
        dicFile.write(key+" "+str(wids_en[key])+"\n")
    dicFile.close()
    print "Writing EN"

    dicFile=open(deDictionaryFile,"w")
    print len(wids_de)
    for key in wids_en:
        dicFile.write(key+" "+str(wids_de[key])+"\n")
    dicFile.close()
    print "Writing DE"

    reverse_wids_en=reverseDictionary(wids_en)
    reverse_wids_de=reverseDictionary(wids_de)

    valid_sentences_en=readData.read_corpus(wids_en,mode="valid",update_dict=False,min_frequency=MIN_EN_FREQUENCY,language="en")
    valid_sentences_de=readData.read_corpus(wids_de,mode="valid",update_dict=False,min_frequency=MIN_DE_FREQUENCY,language="de")

    train_sentences=zip(train_sentences_de,train_sentences_en)
    valid_sentences=zip(valid_sentences_de,valid_sentences_en)

    for train_sentence in train_sentences[:10]:
        print "German:",[reverse_wids_de[x] for x in train_sentence[0]]
        print "English:",[reverse_wids_en[x] for x in train_sentence[1]]


    train_sentences=train_sentences[:MAX_TRAIN_SENTENCES]
    valid_sentences=valid_sentences

    print "Number of Training Sentences:",len(train_sentences)
    print "Number of Validation Sentences:",len(valid_sentences)


    VOCAB_SIZE_EN=len(wids_en)
    VOCAB_SIZE_DE=len(wids_de)

    random.shuffle(train_sentences)
    random.shuffle(valid_sentences)

    #Prepare batches
    lengthMap={}
    for x in train_sentences:
        if len(x[0]) not in lengthMap:
            lengthMap[len(x[0])]=[]
        lengthMap[len(x[0])].append(x)

    print "Number of Different Lengths:",len(lengthMap)

    train_batches=[]

    for megaBatch in lengthMap.values():
        index=0
        while index<len(megaBatch):
            if index%BATCH_SIZE==0:
                batch=megaBatch[index:min(index+BATCH_SIZE,len(megaBatch))]
                train_batches.append(batch)
                index+=BATCH_SIZE

    print [len(batch) for batch in train_batches]
    print sum([len(batch) for batch in train_batches])

    #Free some memory.Dump useless references
    train_sentences=None
    train_sentences_en=None
    train_sentences_de=None

    #Specify model
    model=dy.Model()

    encoder=dy.LSTMBuilder(LAYER_DEPTH,EMB_SIZE,HIDDEN_SIZE,model)
    revcoder=dy.LSTMBuilder(LAYER_DEPTH,EMB_SIZE,HIDDEN_SIZE,model)
    decoder=dy.LSTMBuilder(LAYER_DEPTH,EMB_SIZE+HIDDEN_SIZE,HIDDEN_SIZE,model)

    encoder_params={}
    encoder_params["lookup"]=model.add_lookup_parameters((VOCAB_SIZE_DE,EMB_SIZE))

    decoder_params={}
    decoder_params["lookup"]=model.add_lookup_parameters((VOCAB_SIZE_EN,EMB_SIZE))
    decoder_params["R"]=model.add_parameters((VOCAB_SIZE_EN,2*HIDDEN_SIZE))
    decoder_params["bias"]=model.add_parameters((VOCAB_SIZE_EN))

    trainer=dy.AdamTrainer(model)

    totalSentences=0
    sentencesCovered=totalSentences/3200

    startTime=datetime.datetime.now()
    print "Start Time",startTime
    for epochId in xrange(NUM_EPOCHS):    
        random.shuffle(train_batches)
        for batchId,batch in enumerate(train_batches):
            if len(batch)>1:
                totalSentences+=len(batch)
                if totalSentences/3200>sentencesCovered:
                    sentencesCovered=totalSentences/3200
                    print "Sentences covered:",totalSentences,"Current Time",datetime.datetime.now()
                sentence_de=[sentence[0] for sentence in batch]
                sentence_en=[sentence[1] for sentence in batch]
                loss,words=do_one_batch(model,encoder,revcoder,decoder,encoder_params,decoder_params,sentence_de,sentence_en)
                loss.value()
                loss.backward()
                trainer.update()
            else:
                totalSentences+=1
                #print "Sentences covered:",totalSentences
                sentence=batch[0]
                sentence_de=sentence[0]
                sentence_en=sentence[1]
                loss,words=do_one_example(model,encoder,revcoder,decoder,encoder_params,decoder_params,sentence_de,sentence_en)
                loss.value()
                loss.backward()
                trainer.update()
            #if totalSentences%1000<20:
            #    print "Total Sentences Covered:",totalSentences

        
        perplexity=0.0
        totalLoss=0.0
        totalWords=0.0
        for valid_sentence in valid_sentences:
            valid_sentence_de=valid_sentence[0]
            valid_sentence_en=valid_sentence[1]
            validLoss,words=do_one_example(model,encoder,revcoder,decoder,encoder_params,decoder_params,valid_sentence_de,valid_sentence_en)
            totalLoss+=float(validLoss.value())
            totalWords+=words
        print totalLoss
        print totalWords
        perplexity=math.exp(totalLoss/totalWords)
        print "Validation perplexity after epoch:",epochId,"Perplexity:",perplexity,"Time:",datetime.datetime.now()             
        
        trainer.update_epoch(1.0)
        
        #Save Model
        modelFile="Models/"+"barebones_enc_dec_batched"+"_"+str(datetime.datetime.now())+"_"+str(EMB_SIZE)+"_"+str(LAYER_DEPTH)+"_"+str(HIDDEN_SIZE)+"_"+str(MIN_EN_FREQUENCY)+"_"+str(MIN_DE_FREQUENCY)
        model.save(modelFile,[encoder,revcoder,decoder,encoder_params["lookup"],decoder_params["lookup"],decoder_params["R"],decoder_params["bias"]])

    return wids_de,wids_en,modelFile
Ejemplo n.º 24
0
    with open(filename, "r") as f:
        for line in f:
            tag, words = line.lower().strip().split(" ||| ")
            yield ([w2i[x] for x in words.split(" ")], t2i[tag])


# Read in the data
train = list(read_dataset("../data/classes/train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("../data/classes/test.txt"))
nwords = len(w2i)
ntags = len(t2i)

# Start DyNet and define trainer
model = dy.Model()
trainer = dy.AdamTrainer(model)

# Define the model
EMB_SIZE = 64
W_emb = model.add_lookup_parameters(
    (nwords, 1, 1, EMB_SIZE))  # Word embeddings
WIN_SIZE = 3
FILTER_SIZE = 64
W_cnn = model.add_parameters(
    (1, WIN_SIZE, EMB_SIZE, FILTER_SIZE))  # cnn weights
b_cnn = model.add_parameters((FILTER_SIZE))  # cnn bias

W_sm = model.add_parameters((ntags, FILTER_SIZE))  # Softmax weights
b_sm = model.add_parameters((ntags))  # Softmax bias

Ejemplo n.º 25
0
lm_words = []

for sent in lm_train:
    for w in sent:
        lm_words.append(w)

lm_words.append("_UNK_")

lm_w2i = defaultdict(count(0).next)
for word in lm_words:
    lm_w2i[word]
lm_i2w = {i: w for w, i in lm_w2i.iteritems()}

lm_nwords = len(lm_w2i)
lm_model = dy.Model()
lm_trainer = dy.AdamTrainer(lm_model)
lm_WORDS_LOOKUP = lm_model.add_lookup_parameters((lm_nwords, 64))
lm_RNN = dy.LSTMBuilder(1, 64, 128, lm_model)
lm_pW = lm_model.add_parameters((lm_nwords, 128))
lm_pb = lm_model.add_parameters(lm_nwords)


def calc_lm_loss(sent):
    dy.renew_cg()
    W = dy.parameter(lm_pW)
    b = dy.parameter(lm_pb)
    f_init = lm_RNN.initial_state()

    wids = []
    for w in sent:
        if w in lm_words: