def main(args, logger): random.seed(args.seed) torch.manual_seed(args.seed) device = torch.device('cuda' if args.cuda else 'cpu') dat = TaggingDataset(args.data, args.batch_size, device) dat.log(logger) logger.log(str(args)) model = BiLSTMTagger(len(dat.word2x), len(dat.tag2y), len(dat.char2c), args.wdim, args.cdim, args.hdim, args.dropout, args.layers, args.nochar, args.loss, args.init).to(device) model.apply(get_init_weights(args.init)) optim = torch.optim.Adam(model.parameters(), lr=args.lr) best_model = copy.deepcopy(model) best_perf = float('-inf') bad_epochs = 0 try: for ep in range(1, args.epochs+1): random.shuffle(dat.batches_train) output = model.do_epoch(ep, dat.batches_train, args.clip, optim, logger=logger, check_interval=args.check_interval) if math.isnan(output['loss']): break with torch.no_grad(): eval_result = model.evaluate(dat.batches_val, dat.tag2y) perf = eval_result['acc'] if not 'O' in dat.tag2y else \ eval_result['f1_<all>'] logger.log('Epoch {:3d} | '.format(ep) + ' '.join(['{:s} {:8.3f} | '.format(key, output[key]) for key in output]) + ' val perf {:8.3f}'.format(perf), newline=False) if perf > best_perf: best_perf = perf bad_epochs = 0 logger.log('\t*Updating best model*') best_model.load_state_dict(model.state_dict()) else: bad_epochs += 1 logger.log('\tBad epoch %d' % bad_epochs) if bad_epochs >= args.max_bad_epochs: break except KeyboardInterrupt: logger.log('-'*89) logger.log('Exiting from training early') return best_model, best_perf
def main(training_file, training_dir, load_model, skip_train): logging.debug('Initializing random seed to 0.') random.seed(0) np.random.seed(0) if load_model: tagger = Tagger.load(load_model) data = TaggingDataset.load_from_file(training_file, vocab=tagger.vocab, tags=tagger.tags) else: assert not skip_train, 'Cannot --skip_train without a saved model.' logging.debug('Loading dataset from: %s' % training_file) data = TaggingDataset.load_from_file(training_file) logging.debug('Initializing model.') tagger = Tagger(data.vocab, data.tags) if not skip_train: train_data, dev_data = data.split(0.7) batches_train = train_data.prepare_batches(n_seqs_per_batch=10) batches_dev = dev_data.prepare_batches(n_seqs_per_batch=100) train_mgr = TrainingManager( avg_n_losses=len(batches_train), training_dir=training_dir, tagger_taste_fn=lambda: taste_tagger(tagger, batches_train), tagger_dev_eval_fn=lambda: eval_tagger(tagger, batches_dev), tagger_save_fn=lambda fname: tagger.save(fname)) logging.debug('Starting training.') while train_mgr.should_continue(): mb_x, mb_y = random.choice(batches_train) mb_loss = tagger.learn(mb_x, mb_y) train_mgr.tick(mb_loss=mb_loss) evaluate_tagger_and_writeout(tagger)
def main(training_file, training_dir, load_model, skip_train): logging.debug('Initializing random seed to 0.') random.seed(0) np.random.seed(0) if load_model: tagger = Tagger.load(load_model) data = TaggingDataset.load_from_file(training_file, vocab=tagger.vocab, tags=tagger.tags) else: assert not skip_train, 'Cannot --skip_train without a saved model.' logging.debug('Loading dataset from: %s' % training_file) data = TaggingDataset.load_from_file(training_file) logging.debug('Initializing model.') tagger = Tagger(data.vocab, data.tags) if not skip_train: train_data, dev_data = data.split(0.7) batches_train = train_data.prepare_batches(n_seqs_per_batch=10) batches_dev = dev_data.prepare_batches(n_seqs_per_batch=100) train_mgr = TrainingManager( avg_n_losses=len(batches_train), training_dir=training_dir, tagger_taste_fn=lambda: taste_tagger(tagger, batches_train), tagger_dev_eval_fn=lambda: eval_tagger(tagger, batches_dev), tagger_save_fn=lambda fname: tagger.save(fname) ) logging.debug('Starting training.') while train_mgr.should_continue(): mb_x, mb_y = random.choice(batches_train) mb_loss = tagger.learn(mb_x, mb_y) train_mgr.tick(mb_loss=mb_loss) evaluate_tagger_and_writeout(tagger)
def run_tests(args): device = torch.device('cuda' if args.cuda else 'cpu') dat = TaggingDataset(args.data, 8, device) package = torch.load(args.model) if args.cuda else \ torch.load(args.model, map_location=torch.device('cpu')) opt = package['opt'] model = BiLSTMTagger(len(dat.word2x), len(dat.tag2y), len(dat.char2c), opt.wdim, opt.cdim, opt.hdim, opt.dropout, opt.layers, opt.nochar, opt.loss, opt.init).to(device) model.load_state_dict(package['sd']) with torch.no_grad(): val_result = model.evaluate(dat.batches_val, dat.tag2y) test_result = model.evaluate(dat.batches_test, dat.tag2y) return val_result, test_result
def evaluate_tagger_and_writeout(tagger): stdin = conllu.reader() stdout = conllu.writer() for sentence in stdin: x = [] for word in sentence: x.append(tagger.vocab.get(TaggingDataset.word_obj_to_str(word), tagger.vocab['#OOV'])) x = np.array([x], dtype='int32') y_hat = tagger.predict(x)[0] y_hat_str = [tagger.tags.rev(tag_id) for tag_id in y_hat] for word, utag in zip(sentence, y_hat_str): word.upos = utag stdout.write_sentence(sentence)
def evaluate_tagger_and_writeout(tagger): stdin = conllu.reader() stdout = conllu.writer() for sentence in stdin: x = [] for word in sentence: x.append( tagger.vocab.get(TaggingDataset.word_obj_to_str(word), tagger.vocab['#OOV'])) x = np.array([x], dtype='int32') y_hat = tagger.predict(x)[0] y_hat_str = [tagger.tags.rev(tag_id) for tag_id in y_hat] for word, utag in zip(sentence, y_hat_str): word.upos = utag stdout.write_sentence(sentence)
def main(args): logging.debug('Initializing random seed to 0.') random.seed(0) np.random.seed(0) tf.set_random_seed(0) logging.debug('Loading training dataset from: %s' % args.training_file) train_data = TaggingDataset.load_from_file(args.training_file) dev_data = TaggingDataset.load_from_file(None, vocab=train_data.vocab, alphabet=train_data.alphabet, tags=train_data.tags) logging.debug('Initializing model.') tagger = Tagger(train_data.vocab, train_data.tags, train_data.alphabet, word_embedding_size=args.word_embedding_size, char_embedding_size=args.char_embedding_size, num_chars=args.max_word_length, num_steps=args.max_sentence_length, optimizer_desc=args.optimizer, generate_lemmas=args.generate_lemmas, l2=args.l2, dropout_prob_values=[float(x) for x in args.dropout.split(",")], experiment_name=args.exp_name, supply_form_characters_to_lemma=args.supply_form_characters_to_lemma, threads=args.threads, use_attention=args.use_attention, scheduled_sampling=args.scheduled_sampling) batches_train = train_data.prepare_batches( args.batch_size, args.max_sentence_length, args.max_word_length) batches_dev = dev_data.prepare_batches( 2100, args.max_sentence_length, args.max_word_length) train_mgr = TrainingManager( len(batches_train), args.eval_interval, training_dir=args.training_dir, tagger_taste_fn=lambda: taste_tagger(tagger, batches_train), tagger_dev_eval_fn=lambda: eval_tagger(tagger, batches_dev), tagger_save_fn=lambda fname: tagger.save(fname) ) import signal force_eval = {"value": False} def handle_sigquit(signal, frame): logging.debug("Ctrl+\\ recieved, evaluation will be forced.") force_eval["value"] = True pass signal.signal(signal.SIGQUIT, handle_sigquit) logging.debug('Starting training.') try: permuted_batches = [] while train_mgr.should_continue(max_epochs=args.max_epochs): if not permuted_batches: permuted_batches = batches_train[:] random.shuffle(permuted_batches) words, chars, tags, lengths, lemma_chars, chars_lengths = permuted_batches.pop() oov_mask = np.vectorize(lambda x: train_data.vocab.count(x) == 1 and np.random.uniform() < args.oov_sampling_p)(words) words = np.where(oov_mask, np.zeros(words.shape), words) mb_loss = tagger.learn(words, chars, tags, lengths, lemma_chars, chars_lengths) train_mgr.tick(mb_loss=mb_loss, force_eval=force_eval["value"]) force_eval["value"] = False except KeyboardInterrupt: logging.debug("Ctrl+C recieved, stopping training.") run_tagger_and_writeout(tagger, dev_data)