Example #1
0
def main(args, logger):
    random.seed(args.seed)
    torch.manual_seed(args.seed)

    device = torch.device('cuda' if args.cuda else 'cpu')
    dat = TaggingDataset(args.data, args.batch_size, device)
    dat.log(logger)
    logger.log(str(args))

    model = BiLSTMTagger(len(dat.word2x), len(dat.tag2y), len(dat.char2c),
                         args.wdim, args.cdim, args.hdim, args.dropout,
                         args.layers, args.nochar, args.loss,
                         args.init).to(device)
    model.apply(get_init_weights(args.init))

    optim = torch.optim.Adam(model.parameters(), lr=args.lr)
    best_model = copy.deepcopy(model)
    best_perf = float('-inf')
    bad_epochs = 0

    try:
        for ep in range(1, args.epochs+1):
            random.shuffle(dat.batches_train)
            output = model.do_epoch(ep, dat.batches_train, args.clip, optim,
                                    logger=logger,
                                    check_interval=args.check_interval)

            if math.isnan(output['loss']):
                break

            with torch.no_grad():
                eval_result = model.evaluate(dat.batches_val, dat.tag2y)

            perf = eval_result['acc'] if not 'O' in dat.tag2y else \
                   eval_result['f1_<all>']

            logger.log('Epoch {:3d} | '.format(ep) +
                       ' '.join(['{:s} {:8.3f} | '.format(key, output[key])
                                 for key in output]) +
                       ' val perf {:8.3f}'.format(perf), newline=False)

            if perf > best_perf:
                best_perf = perf
                bad_epochs = 0
                logger.log('\t*Updating best model*')
                best_model.load_state_dict(model.state_dict())
            else:
                bad_epochs += 1
                logger.log('\tBad epoch %d' % bad_epochs)

            if bad_epochs >= args.max_bad_epochs:
                break

    except KeyboardInterrupt:
        logger.log('-'*89)
        logger.log('Exiting from training early')

    return best_model, best_perf
Example #2
0
def main(training_file, training_dir, load_model, skip_train):
    logging.debug('Initializing random seed to 0.')
    random.seed(0)
    np.random.seed(0)

    if load_model:
        tagger = Tagger.load(load_model)
        data = TaggingDataset.load_from_file(training_file,
                                             vocab=tagger.vocab,
                                             tags=tagger.tags)
    else:
        assert not skip_train, 'Cannot --skip_train without a saved model.'
        logging.debug('Loading dataset from: %s' % training_file)
        data = TaggingDataset.load_from_file(training_file)
        logging.debug('Initializing model.')
        tagger = Tagger(data.vocab, data.tags)

    if not skip_train:
        train_data, dev_data = data.split(0.7)

        batches_train = train_data.prepare_batches(n_seqs_per_batch=10)
        batches_dev = dev_data.prepare_batches(n_seqs_per_batch=100)

        train_mgr = TrainingManager(
            avg_n_losses=len(batches_train),
            training_dir=training_dir,
            tagger_taste_fn=lambda: taste_tagger(tagger, batches_train),
            tagger_dev_eval_fn=lambda: eval_tagger(tagger, batches_dev),
            tagger_save_fn=lambda fname: tagger.save(fname))

        logging.debug('Starting training.')
        while train_mgr.should_continue():
            mb_x, mb_y = random.choice(batches_train)
            mb_loss = tagger.learn(mb_x, mb_y)

            train_mgr.tick(mb_loss=mb_loss)

    evaluate_tagger_and_writeout(tagger)
Example #3
0
def main(training_file, training_dir, load_model, skip_train):
    logging.debug('Initializing random seed to 0.')
    random.seed(0)
    np.random.seed(0)

    if load_model:
        tagger = Tagger.load(load_model)
        data = TaggingDataset.load_from_file(training_file, vocab=tagger.vocab, tags=tagger.tags)
    else:
        assert not skip_train, 'Cannot --skip_train without a saved model.'
        logging.debug('Loading dataset from: %s' % training_file)
        data = TaggingDataset.load_from_file(training_file)
        logging.debug('Initializing model.')
        tagger = Tagger(data.vocab, data.tags)

    if not skip_train:
        train_data, dev_data = data.split(0.7)

        batches_train = train_data.prepare_batches(n_seqs_per_batch=10)
        batches_dev = dev_data.prepare_batches(n_seqs_per_batch=100)

        train_mgr = TrainingManager(
            avg_n_losses=len(batches_train),
            training_dir=training_dir,
            tagger_taste_fn=lambda: taste_tagger(tagger, batches_train),
            tagger_dev_eval_fn=lambda: eval_tagger(tagger, batches_dev),
            tagger_save_fn=lambda fname: tagger.save(fname)
        )

        logging.debug('Starting training.')
        while train_mgr.should_continue():
            mb_x, mb_y = random.choice(batches_train)
            mb_loss = tagger.learn(mb_x, mb_y)

            train_mgr.tick(mb_loss=mb_loss)

    evaluate_tagger_and_writeout(tagger)
Example #4
0
def run_tests(args):
    device = torch.device('cuda' if args.cuda else 'cpu')
    dat = TaggingDataset(args.data, 8, device)

    package = torch.load(args.model) if args.cuda else \
              torch.load(args.model, map_location=torch.device('cpu'))
    opt = package['opt']
    model = BiLSTMTagger(len(dat.word2x), len(dat.tag2y), len(dat.char2c),
                         opt.wdim, opt.cdim, opt.hdim, opt.dropout,
                         opt.layers, opt.nochar, opt.loss, opt.init).to(device)
    model.load_state_dict(package['sd'])

    with torch.no_grad():
        val_result = model.evaluate(dat.batches_val, dat.tag2y)
        test_result = model.evaluate(dat.batches_test, dat.tag2y)

    return val_result, test_result
Example #5
0
def evaluate_tagger_and_writeout(tagger):
    stdin = conllu.reader()
    stdout = conllu.writer()
    for sentence in stdin:
        x = []
        for word in sentence:
            x.append(tagger.vocab.get(TaggingDataset.word_obj_to_str(word), tagger.vocab['#OOV']))

        x = np.array([x], dtype='int32')

        y_hat = tagger.predict(x)[0]
        y_hat_str = [tagger.tags.rev(tag_id) for tag_id in y_hat]

        for word, utag in zip(sentence, y_hat_str):
            word.upos = utag

        stdout.write_sentence(sentence)
Example #6
0
def evaluate_tagger_and_writeout(tagger):
    stdin = conllu.reader()
    stdout = conllu.writer()
    for sentence in stdin:
        x = []
        for word in sentence:
            x.append(
                tagger.vocab.get(TaggingDataset.word_obj_to_str(word),
                                 tagger.vocab['#OOV']))

        x = np.array([x], dtype='int32')

        y_hat = tagger.predict(x)[0]
        y_hat_str = [tagger.tags.rev(tag_id) for tag_id in y_hat]

        for word, utag in zip(sentence, y_hat_str):
            word.upos = utag

        stdout.write_sentence(sentence)
Example #7
0
def main(args):
    logging.debug('Initializing random seed to 0.')
    random.seed(0)
    np.random.seed(0)
    tf.set_random_seed(0)

    logging.debug('Loading training dataset from: %s' % args.training_file)
    train_data = TaggingDataset.load_from_file(args.training_file)
    dev_data = TaggingDataset.load_from_file(None, vocab=train_data.vocab,
                                             alphabet=train_data.alphabet, tags=train_data.tags)
    logging.debug('Initializing model.')
    tagger = Tagger(train_data.vocab, train_data.tags, train_data.alphabet,
                    word_embedding_size=args.word_embedding_size,
                    char_embedding_size=args.char_embedding_size,
                    num_chars=args.max_word_length,
                    num_steps=args.max_sentence_length,
                    optimizer_desc=args.optimizer,
                    generate_lemmas=args.generate_lemmas,
                    l2=args.l2,
                    dropout_prob_values=[float(x) for x in args.dropout.split(",")],
                    experiment_name=args.exp_name,
                    supply_form_characters_to_lemma=args.supply_form_characters_to_lemma,
                    threads=args.threads,
                    use_attention=args.use_attention,
                    scheduled_sampling=args.scheduled_sampling)

    batches_train = train_data.prepare_batches(
        args.batch_size, args.max_sentence_length, args.max_word_length)
    batches_dev = dev_data.prepare_batches(
        2100, args.max_sentence_length, args.max_word_length)

    train_mgr = TrainingManager(
        len(batches_train), args.eval_interval,
        training_dir=args.training_dir,
        tagger_taste_fn=lambda: taste_tagger(tagger, batches_train),
        tagger_dev_eval_fn=lambda: eval_tagger(tagger, batches_dev),
        tagger_save_fn=lambda fname: tagger.save(fname)
    )

    import signal
    force_eval = {"value": False}
    def handle_sigquit(signal, frame):
        logging.debug("Ctrl+\\ recieved, evaluation will be forced.")
        force_eval["value"] = True
        pass
    signal.signal(signal.SIGQUIT, handle_sigquit)

    logging.debug('Starting training.')
    try:
        permuted_batches = []
        while train_mgr.should_continue(max_epochs=args.max_epochs):
            if not permuted_batches:
                permuted_batches = batches_train[:]
                random.shuffle(permuted_batches)
            words, chars, tags, lengths, lemma_chars, chars_lengths = permuted_batches.pop()
            oov_mask = np.vectorize(lambda x: train_data.vocab.count(x) == 1 and np.random.uniform() < args.oov_sampling_p)(words)
            words = np.where(oov_mask, np.zeros(words.shape), words)
            mb_loss = tagger.learn(words, chars, tags, lengths, lemma_chars, chars_lengths)

            train_mgr.tick(mb_loss=mb_loss, force_eval=force_eval["value"])
            force_eval["value"] = False
    except KeyboardInterrupt:
        logging.debug("Ctrl+C recieved, stopping training.")

    run_tagger_and_writeout(tagger, dev_data)