# ) # from baseline.dy.optz import * # from baseline.dy.dynety import * import argparse if __name__ == "__main__": np.random.seed(666) argparser = argparse.ArgumentParser() argparser.add_argument('--config_file', default='./config.cfg') argparser.add_argument('--model', default='BaseParser') args, extra_args = argparser.parse_known_args() config = Configurable(args.config_file, extra_args) Parser = getattr(models, args.model) vocab = Vocab(config.train_file, config.pretrained_embeddings_file, config.min_occur_count) cPickle.dump(vocab, open(config.save_vocab_path, 'w')) parser = Parser(vocab, config.word_dims, config.pret_dims, config.lemma_dims, config.flag_dims, config.tag_dims, config.dropout_emb, config.encoder_type, config.use_si_dropout, config.lstm_layers, config.lstm_hiddens, config.dropout_lstm_input, config.dropout_lstm_hidden, config.mlp_rel_size, config.dropout_mlp, config.transformer_layers, config.transformer_heads, config.transformer_hiddens, config.transformer_ffn, config.transformer_dropout, config.transformer_maxlen, config.transformer_max_timescale, config.use_lm, config.lm_path, config.lm_dims, config.lm_hidden_size, config.lm_sentences, config.use_pos, config.use_lemma, config.unified) data_loader = DataLoader(config.train_file, config.num_buckets_train, vocab) pc = parser.parameter_collection trainer = dy.AdamTrainer(pc, config.learning_rate , config.beta_1, config.beta_2, config.epsilon) # optimizer = OptimizerManager(parser, lr_scheduler_type=config.lr_scheduler_type, optim=config.optim, warmup_steps=config.warmup_steps, # eta=config.eta, patience=config.patience, clip=config.clip, # lr = config.learning_rate, beta1=config.beta_1, beta2 = config.beta_2, epsilon=config.epsilon)
import sys sys.path.append('../') from lib import Vocab import argparse if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument('--vocab', default='../data/movie_25000') argparser.add_argument('--fname', type=str) args, extra_args = argparser.parse_known_args() vocab = Vocab(args.vocab) with open(args.fname) as f: for line in f.readlines(): sents = [] for sent in line.split('|'): sent = ' '.join( [vocab.i2s(int(word)) for word in sent.split()]) sents.append(sent) new_line = '|'.join(sents) print new_line
import os from lib import Vocab, DataLoader root_path = '/home/clementine/projects/树库转换/ctb51_processed/dep_zhang_clark_conlu/' training = os.path.join(root_path, 'train.txt.3.pa.gs.tab.conllu') vocab = Vocab(training, pret_file=None, min_occur_count=2) ctb_loader = DataLoader(training, 40, vocab) for i, example in enumerate(ctb_loader.get_batches(40, shuffle=True)): print(example[0].shape, example[1].shape, example[2].shape, example[3].shape) if i == 0: break
n_best=n_best, max_length=50, gpu=True, global_scorer=scorer) with open(out_file, 'w') as f: for batch in data_set: ret = worker.translate_batch(batch) preds = ret['predictions'] for pred in preds: sent = ' '.join(str(x) for x in pred[0]) f.write(sent + '\n') import argparse if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument('--model', type=str) argparser.add_argument('--fname', type=str) argparser.add_argument('--beam_size', default=1) argparser.add_argument('--n_best', default=1) argparser.add_argument('--out_file', default="test_out") argparser.add_argument('--config_file', default='default.cfg') args, extra_args = argparser.parse_known_args() opt = Configurable(args.config_file, extra_args, for_test=True) model = ADVModel(opt) model.load_checkpoint(args.model) vocab = Vocab(opt.tgt_vocab) translate(model, args.fname, vocab, args.beam_size, args.n_best, args.out_file)
from lib import Vocab, DataLoader from test import test from config import Configurable import argparse if __name__ == "__main__": # change 1 line np.random.seed(np.random.randint(0, 1000)) argparser = argparse.ArgumentParser() argparser.add_argument('--config_file', default='../configs/default.cfg') argparser.add_argument('--model', default='BaseParser') args, extra_args = argparser.parse_known_args() config = Configurable(args.config_file, extra_args) Parser = getattr(models, args.model) vocab = Vocab(config.train_file, None, config.min_occur_count) cPickle.dump(vocab, open(config.save_vocab_path, 'w')) parser = Parser(vocab, config.word_dims, config.tag_dims, config.dropout_emb, config.lstm_layers, config.lstm_hiddens, config.dropout_lstm_input, config.dropout_lstm_hidden, config.mlp_arc_size, config.mlp_rel_size, config.dropout_mlp) data_loader = DataLoader(config.train_file, config.num_buckets_train, vocab) pc = parser.parameter_collection trainer = dy.AdamTrainer(pc, config.learning_rate, config.beta_1, config.beta_2, config.epsilon) global_step = 0 def update_parameters():