Esempio n. 1
0
# )
# from baseline.dy.optz import *
# from baseline.dy.dynety import *


import argparse
if __name__ == "__main__":
	np.random.seed(666)
	argparser = argparse.ArgumentParser()
	argparser.add_argument('--config_file', default='./config.cfg')
	argparser.add_argument('--model', default='BaseParser')
	args, extra_args = argparser.parse_known_args()
	config = Configurable(args.config_file, extra_args)
	Parser = getattr(models, args.model)

	vocab = Vocab(config.train_file, config.pretrained_embeddings_file, config.min_occur_count)
	cPickle.dump(vocab, open(config.save_vocab_path, 'w'))
	parser = Parser(vocab, config.word_dims, config.pret_dims, config.lemma_dims, config.flag_dims, config.tag_dims, config.dropout_emb, 
					config.encoder_type, config.use_si_dropout,
					config.lstm_layers, config.lstm_hiddens, config.dropout_lstm_input, config.dropout_lstm_hidden, config.mlp_rel_size, config.dropout_mlp, 
					config.transformer_layers, config.transformer_heads, config.transformer_hiddens, config.transformer_ffn, config.transformer_dropout, config.transformer_maxlen, config.transformer_max_timescale,
					config.use_lm, config.lm_path, config.lm_dims, config.lm_hidden_size, config.lm_sentences,
					config.use_pos, config.use_lemma,
					config.unified)
	data_loader = DataLoader(config.train_file, config.num_buckets_train, vocab)
	pc = parser.parameter_collection
	trainer = dy.AdamTrainer(pc, config.learning_rate , config.beta_1, config.beta_2, config.epsilon)
	# optimizer = OptimizerManager(parser, lr_scheduler_type=config.lr_scheduler_type, optim=config.optim, warmup_steps=config.warmup_steps, 
	# 							eta=config.eta, patience=config.patience, clip=config.clip,
	# 							lr = config.learning_rate, beta1=config.beta_1, beta2 = config.beta_2, epsilon=config.epsilon)
	
Esempio n. 2
0
import sys
sys.path.append('../')
from lib import Vocab

import argparse
if __name__ == "__main__":
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--vocab', default='../data/movie_25000')
    argparser.add_argument('--fname', type=str)
    args, extra_args = argparser.parse_known_args()
    vocab = Vocab(args.vocab)

    with open(args.fname) as f:
        for line in f.readlines():
            sents = []
            for sent in line.split('|'):
                sent = ' '.join(
                    [vocab.i2s(int(word)) for word in sent.split()])
                sents.append(sent)
            new_line = '|'.join(sents)
            print new_line
Esempio n. 3
0
import os

from lib import Vocab, DataLoader

root_path = '/home/clementine/projects/树库转换/ctb51_processed/dep_zhang_clark_conlu/'
training = os.path.join(root_path, 'train.txt.3.pa.gs.tab.conllu')
vocab = Vocab(training, pret_file=None, min_occur_count=2)

ctb_loader = DataLoader(training, 40, vocab)
for i, example in enumerate(ctb_loader.get_batches(40, shuffle=True)):
    print(example[0].shape, example[1].shape, example[2].shape,
          example[3].shape)
    if i == 0:
        break
Esempio n. 4
0
                        n_best=n_best,
                        max_length=50,
                        gpu=True,
                        global_scorer=scorer)

    with open(out_file, 'w') as f:
        for batch in data_set:
            ret = worker.translate_batch(batch)
            preds = ret['predictions']
            for pred in preds:
                sent = ' '.join(str(x) for x in pred[0])
                f.write(sent + '\n')


import argparse
if __name__ == "__main__":
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--model', type=str)
    argparser.add_argument('--fname', type=str)
    argparser.add_argument('--beam_size', default=1)
    argparser.add_argument('--n_best', default=1)
    argparser.add_argument('--out_file', default="test_out")
    argparser.add_argument('--config_file', default='default.cfg')
    args, extra_args = argparser.parse_known_args()
    opt = Configurable(args.config_file, extra_args, for_test=True)
    model = ADVModel(opt)
    model.load_checkpoint(args.model)
    vocab = Vocab(opt.tgt_vocab)
    translate(model, args.fname, vocab, args.beam_size, args.n_best,
              args.out_file)
Esempio n. 5
0
from lib import Vocab, DataLoader
from test import test
from config import Configurable

import argparse
if __name__ == "__main__":
    # change 1 line
    np.random.seed(np.random.randint(0, 1000))
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--config_file', default='../configs/default.cfg')
    argparser.add_argument('--model', default='BaseParser')
    args, extra_args = argparser.parse_known_args()
    config = Configurable(args.config_file, extra_args)
    Parser = getattr(models, args.model)

    vocab = Vocab(config.train_file, None, config.min_occur_count)
    cPickle.dump(vocab, open(config.save_vocab_path, 'w'))
    parser = Parser(vocab, config.word_dims, config.tag_dims,
                    config.dropout_emb, config.lstm_layers,
                    config.lstm_hiddens, config.dropout_lstm_input,
                    config.dropout_lstm_hidden, config.mlp_arc_size,
                    config.mlp_rel_size, config.dropout_mlp)
    data_loader = DataLoader(config.train_file, config.num_buckets_train,
                             vocab)
    pc = parser.parameter_collection
    trainer = dy.AdamTrainer(pc, config.learning_rate, config.beta_1,
                             config.beta_2, config.epsilon)

    global_step = 0

    def update_parameters():