def train(self,
              train_file,
              dev_file,
              test_file,
              save_dir,
              pretrained_embeddings=None,
              min_occur_count=2,
              lstm_layers=3,
              word_dims=100,
              tag_dims=100,
              dropout_emb=0.33,
              lstm_hiddens=400,
              dropout_lstm_input=0.33,
              dropout_lstm_hidden=0.33,
              mlp_arc_size=500,
              mlp_rel_size=100,
              dropout_mlp=0.33,
              learning_rate=2e-3,
              decay=.75,
              decay_steps=5000,
              beta_1=.9,
              beta_2=.9,
              epsilon=1e-12,
              num_buckets_train=40,
              num_buckets_valid=10,
              num_buckets_test=10,
              train_iters=50000,
              train_batch_size=5000,
              test_batch_size=5000,
              validate_every=100,
              save_after=5000,
              debug=False):
        """Train a deep biaffine dependency parser.

        Parameters
        ----------
        train_file : str
            path to training set
        dev_file : str
            path to dev set
        test_file : str
            path to test set
        save_dir : str
            a directory for saving model and related meta-data
        pretrained_embeddings : tuple
            (embedding_name, source), used for gluonnlp.embedding.create(embedding_name, source)
        min_occur_count : int
            threshold of rare words, which will be replaced with UNKs,
        lstm_layers : int
            layers of lstm
        word_dims : int
            dimension of word embedding
        tag_dims : int
            dimension of tag embedding
        dropout_emb : float
            word dropout
        lstm_hiddens : int
            size of lstm hidden states
        dropout_lstm_input : int
            dropout on x in variational RNN
        dropout_lstm_hidden : int
            dropout on h in variational RNN
        mlp_arc_size : int
            output size of MLP for arc feature extraction
        mlp_rel_size : int
            output size of MLP for rel feature extraction
        dropout_mlp : float
            dropout on the output of LSTM
        learning_rate : float
            learning rate
        decay : float
            see ExponentialScheduler
        decay_steps : int
            see ExponentialScheduler
        beta_1 : float
            see ExponentialScheduler
        beta_2 : float
            see ExponentialScheduler
        epsilon : float
            see ExponentialScheduler
        num_buckets_train : int
            number of buckets for training data set
        num_buckets_valid : int
            number of buckets for dev data set
        num_buckets_test : int
            number of buckets for testing data set
        train_iters : int
            training iterations
        train_batch_size : int
            training batch size
        test_batch_size : int
            test batch size
        validate_every : int
            validate on dev set every such number of batches
        save_after : int
            skip saving model in early epochs
        debug : bool
            debug mode

        Returns
        -------
        DepParser
            parser itself
        """
        logger = init_logger(save_dir)
        config = _Config(train_file, dev_file, test_file, save_dir,
                         pretrained_embeddings, min_occur_count, lstm_layers,
                         word_dims, tag_dims, dropout_emb, lstm_hiddens,
                         dropout_lstm_input, dropout_lstm_hidden, mlp_arc_size,
                         mlp_rel_size, dropout_mlp, learning_rate, decay,
                         decay_steps, beta_1, beta_2, epsilon,
                         num_buckets_train, num_buckets_valid,
                         num_buckets_test, train_iters, train_batch_size,
                         debug)
        config.save()
        self._vocab = vocab = ParserVocabulary(train_file,
                                               pretrained_embeddings,
                                               min_occur_count)
        vocab.save(config.save_vocab_path)
        vocab.log_info(logger)

        with mx.Context(mxnet_prefer_gpu()):
            self._parser = parser = BiaffineParser(
                vocab, word_dims, tag_dims, dropout_emb, lstm_layers,
                lstm_hiddens, dropout_lstm_input, dropout_lstm_hidden,
                mlp_arc_size, mlp_rel_size, dropout_mlp, debug)
            parser.initialize()
            scheduler = ExponentialScheduler(learning_rate, decay, decay_steps)
            optimizer = mx.optimizer.Adam(learning_rate,
                                          beta_1,
                                          beta_2,
                                          epsilon,
                                          lr_scheduler=scheduler)
            trainer = gluon.Trainer(parser.collect_params(),
                                    optimizer=optimizer)
            data_loader = DataLoader(train_file, num_buckets_train, vocab)
            global_step = 0
            best_UAS = 0.
            batch_id = 0
            epoch = 1
            total_epoch = math.ceil(train_iters / validate_every)
            logger.info('Epoch %d out of %d', epoch, total_epoch)
            bar = Progbar(target=min(validate_every, data_loader.samples))
            while global_step < train_iters:
                for words, tags, arcs, rels in data_loader.get_batches(
                        batch_size=train_batch_size, shuffle=True):
                    with autograd.record():
                        arc_accuracy, _, _, loss = parser.forward(
                            words, tags, arcs, rels)
                        loss_value = loss.asscalar()
                    loss.backward()
                    trainer.step(train_batch_size)
                    batch_id += 1
                    try:
                        bar.update(batch_id,
                                   exact=[('UAS', arc_accuracy, 2),
                                          ('loss', loss_value)])
                    except OverflowError:
                        pass  # sometimes loss can be 0 or infinity, crashes the bar

                    global_step += 1
                    if global_step % validate_every == 0:
                        bar = Progbar(target=min(validate_every, train_iters -
                                                 global_step))
                        batch_id = 0
                        UAS, LAS, speed = evaluate_official_script(
                            parser, vocab, num_buckets_valid, test_batch_size,
                            dev_file, os.path.join(save_dir, 'valid_tmp'))
                        logger.info('Dev: UAS %.2f%% LAS %.2f%% %d sents/s',
                                    UAS, LAS, speed)
                        epoch += 1
                        if global_step < train_iters:
                            logger.info('Epoch %d out of %d', epoch,
                                        total_epoch)
                        if global_step > save_after and UAS > best_UAS:
                            logger.info('- new best score!')
                            best_UAS = UAS
                            parser.save(config.save_model_path)

        # When validate_every is too big
        if not os.path.isfile(config.save_model_path) or best_UAS != UAS:
            parser.save(config.save_model_path)

        return self
Example #2
0
def evaluate_official_script(parser,
                             vocab,
                             num_buckets_test,
                             test_batch_size,
                             test_file,
                             output_file,
                             debug=False):
    """Evaluate parser on a data set.

    Parameters
    ----------
    parser : BiaffineParser
        biaffine parser
    vocab : ParserVocabulary
        vocabulary built from data set
    num_buckets_test : int
        size of buckets (cluster sentences into this number of clusters)
    test_batch_size : int
        batch size
    test_file : str
        gold test file
    output_file : str
        output result to this file
    debug : bool
        only evaluate first 1000 sentences for debugging

    Returns
    -------
    tuple
        UAS, LAS, speed
    """
    if output_file is None:
        output_file = tempfile.NamedTemporaryFile().name
    data_loader = DataLoader(test_file, num_buckets_test, vocab)
    record = data_loader.idx_sequence
    results = [None] * len(record)
    idx = 0
    seconds = time.time()
    for words, tags, arcs, rels in data_loader.get_batches(
            batch_size=test_batch_size, shuffle=False):
        outputs = parser.forward(words, tags)
        for output in outputs:
            sent_idx = record[idx]
            results[sent_idx] = output
            idx += 1
    assert idx == len(results), 'parser swallowed some sentences'
    seconds = time.time() - seconds
    speed = len(record) / seconds

    arcs = reduce(lambda x, y: x + y, [list(result[0]) for result in results])
    rels = reduce(lambda x, y: x + y, [list(result[1]) for result in results])
    idx = 0
    with open(test_file) as f:
        if debug:
            f = f.readlines()[:1000]
        with open(output_file, 'w') as fo:
            for line in f:
                info = line.strip().split()
                if info:
                    arc_offset = 5
                    rel_offset = 6
                    if len(info) == 10:  # conll or conllx
                        arc_offset = 6
                        rel_offset = 7
                    # assert len(info) == 10, 'Illegal line: %s' % line
                    info[arc_offset] = str(arcs[idx])
                    info[rel_offset] = vocab.id2rel(rels[idx])
                    fo.write('\t'.join(info) + '\n')
                    idx += 1
                else:
                    fo.write('\n')

    os.system('perl %s -q -b -g %s -s %s -o tmp' %
              (os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            'eval.pl'), test_file, output_file))
    os.system('tail -n 3 tmp > score_tmp')
    LAS, UAS = [
        float(line.strip().split()[-2])
        for line in open('score_tmp').readlines()[:2]
    ]
    # print('UAS %.2f, LAS %.2f' % (UAS, LAS))
    os.system('rm tmp score_tmp')
    os.remove(output_file)
    return UAS, LAS, speed
Example #3
0
def evaluate_official_script(parser, vocab, num_buckets_test, test_batch_size, test_file, output_file,
                             debug=False):
    """Evaluate parser on a data set

    Parameters
    ----------
    parser : BiaffineParser
        biaffine parser
    vocab : ParserVocabulary
        vocabulary built from data set
    num_buckets_test : int
        size of buckets (cluster sentences into this number of clusters)
    test_batch_size : int
        batch size
    test_file : str
        gold test file
    output_file : str
        output result to this file
    debug : bool
        only evaluate first 1000 sentences for debugging

    Returns
    -------
    tuple
        UAS, LAS, speed
    """
    if output_file is None:
        output_file = tempfile.NamedTemporaryFile().name
    data_loader = DataLoader(test_file, num_buckets_test, vocab)
    record = data_loader.idx_sequence
    results = [None] * len(record)
    idx = 0
    seconds = time.time()
    for words, tags, arcs, rels in data_loader.get_batches(batch_size=test_batch_size,
                                                           shuffle=False):
        outputs = parser.forward(words, tags)
        for output in outputs:
            sent_idx = record[idx]
            results[sent_idx] = output
            idx += 1
    assert idx == len(results), 'parser swallowed some sentences'
    seconds = time.time() - seconds
    speed = len(record) / seconds

    arcs = reduce(lambda x, y: x + y, [list(result[0]) for result in results])
    rels = reduce(lambda x, y: x + y, [list(result[1]) for result in results])
    idx = 0
    with open(test_file) as f:
        if debug:
            f = f.readlines()[:1000]
        with open(output_file, 'w') as fo:
            for line in f:
                info = line.strip().split()
                if info:
                    arc_offset = 5
                    rel_offset = 6
                    if len(info) == 10:  # conll or conllx
                        arc_offset = 6
                        rel_offset = 7
                    # assert len(info) == 10, 'Illegal line: %s' % line
                    info[arc_offset] = str(arcs[idx])
                    info[rel_offset] = vocab.id2rel(rels[idx])
                    fo.write('\t'.join(info) + '\n')
                    idx += 1
                else:
                    fo.write('\n')

    os.system('perl %s -q -b -g %s -s %s -o tmp' % (
        os.path.join(os.path.dirname(os.path.realpath(__file__)), 'eval.pl'), test_file, output_file))
    os.system('tail -n 3 tmp > score_tmp')
    LAS, UAS = [float(line.strip().split()[-2]) for line in open('score_tmp').readlines()[:2]]
    # print('UAS %.2f, LAS %.2f' % (UAS, LAS))
    os.system('rm tmp score_tmp')
    os.remove(output_file)
    return UAS, LAS, speed
Example #4
0
def evaluate_official_script(parser,
                             vocab,
                             num_buckets_test,
                             test_batch_size,
                             test_file,
                             output_file,
                             debug=False):
    """Evaluate parser on a data set

    Parameters
    ----------
    parser : BiaffineParser
        biaffine parser
    vocab : ParserVocabulary
        vocabulary built from data set
    num_buckets_test : int
        size of buckets (cluster sentences into this number of clusters)
    test_batch_size : int
        batch size
    test_file : str
        gold test file
    output_file : str
        output result to this file
    debug : bool
        only evaluate first 1000 sentences for debugging

    Returns
    -------
    tuple
        UAS, LAS, speed
    """
    data_loader = DataLoader(test_file, num_buckets_test, vocab)
    record = data_loader.idx_sequence
    results = [None] * len(record)
    idx = 0
    seconds = time.time()
    uc, lc, total = 0, 0, 0
    for words, tags, arcs, rels in data_loader.get_batches(
            batch_size=test_batch_size, shuffle=False):
        outputs = parser.forward(words, tags)
        for output, gold_arc, gold_rel in zip(outputs, arcs.transpose([1, 0]),
                                              rels.transpose([1, 0])):
            pred_arc = output[0]
            pred_rel = output[1]
            length = pred_arc.shape[0]
            gold_arc = gold_arc[1:length + 1]
            gold_rel = gold_rel[1:length + 1]

            arc_mask = np.equal(pred_arc, gold_arc)
            uc += np.sum(arc_mask)
            total += length

            lc += np.sum(np.equal(pred_rel, gold_rel) * arc_mask)
            sent_idx = record[idx]
            results[sent_idx] = output
            idx += 1
    speed = len(record) / seconds
    UAS = uc / total * 100
    LAS = lc / total * 100
    if output_file:
        arcs = reduce(lambda x, y: x + y,
                      [list(result[0]) for result in results])
        rels = reduce(lambda x, y: x + y,
                      [list(result[1]) for result in results])
        idx = 0
        with open(test_file) as f:
            if debug:
                f = f.readlines()[:1000]
            with open(output_file, 'w') as fo:
                for line in f:
                    info = line.strip().split()
                    if info:
                        arc_offset = 5
                        rel_offset = 6
                        if len(info) == 10:  # conll or conllx
                            arc_offset = 6
                            rel_offset = 7
                        # assert len(info) == 10, 'Illegal line: %s' % line
                        info[arc_offset] = str(arcs[idx])
                        info[rel_offset] = vocab.id2rel(rels[idx])
                        fo.write('\t'.join(info) + '\n')
                        idx += 1
                    else:
                        fo.write('\n')
    return UAS, LAS, speed
Example #5
0
    def train(self, train_file, dev_file, test_file, save_dir, pretrained_embeddings=None, min_occur_count=2,
              lstm_layers=3, word_dims=100, tag_dims=100, dropout_emb=0.33, lstm_hiddens=400,
              dropout_lstm_input=0.33, dropout_lstm_hidden=0.33, mlp_arc_size=500, mlp_rel_size=100,
              dropout_mlp=0.33, learning_rate=2e-3, decay=.75, decay_steps=5000, beta_1=.9, beta_2=.9, epsilon=1e-12,
              num_buckets_train=40,
              num_buckets_valid=10, num_buckets_test=10, train_iters=50000, train_batch_size=5000,
              test_batch_size=5000, validate_every=100, save_after=5000, debug=False):
        """Train a deep biaffine dependency parser

        Parameters
        ----------
        train_file : str
            path to training set
        dev_file : str
            path to dev set
        test_file : str
            path to test set
        save_dir : str
            a directory for saving model and related meta-data
        pretrained_embeddings : tuple
            (embedding_name, source), used for gluonnlp.embedding.create(embedding_name, source)
        min_occur_count : int
            threshold of rare words, which will be replaced with UNKs,
        lstm_layers : int
            layers of lstm
        word_dims : int
            dimension of word embedding
        tag_dims : int
            dimension of tag embedding
        dropout_emb : float
            word dropout
        lstm_hiddens : int
            size of lstm hidden states
        dropout_lstm_input : int
            dropout on x in variational RNN
        dropout_lstm_hidden : int
            dropout on h in variational RNN
        mlp_arc_size : int
            output size of MLP for arc feature extraction
        mlp_rel_size : int
            output size of MLP for rel feature extraction
        dropout_mlp : float
            dropout on the output of LSTM
        learning_rate : float
            learning rate
        decay : float
            see ExponentialScheduler
        decay_steps : int
            see ExponentialScheduler
        beta_1 : float
            see ExponentialScheduler
        beta_2 : float
            see ExponentialScheduler
        epsilon : float
            see ExponentialScheduler
        num_buckets_train : int
            number of buckets for training data set
        num_buckets_valid : int
            number of buckets for dev data set
        num_buckets_test : int
            number of buckets for testing data set
        train_iters : int
            training iterations
        train_batch_size : int
            training batch size
        test_batch_size : int
            test batch size
        validate_every : int
            validate on dev set every such number of batches
        save_after : int
            skip saving model in early epochs
        debug : bool
            debug mode

        Returns
        -------
        DepParser
            parser itself
        """
        logger = init_logger(save_dir)
        config = _Config(train_file, dev_file, test_file, save_dir, pretrained_embeddings, min_occur_count,
                         lstm_layers, word_dims, tag_dims, dropout_emb, lstm_hiddens, dropout_lstm_input,
                         dropout_lstm_hidden, mlp_arc_size, mlp_rel_size, dropout_mlp, learning_rate, decay,
                         decay_steps,
                         beta_1, beta_2, epsilon, num_buckets_train, num_buckets_valid, num_buckets_test, train_iters,
                         train_batch_size, debug)
        config.save()
        self._vocab = vocab = ParserVocabulary(train_file,
                                               pretrained_embeddings,
                                               min_occur_count)
        vocab.save(config.save_vocab_path)
        vocab.log_info(logger)

        with mx.Context(mxnet_prefer_gpu()):

            self._parser = parser = BiaffineParser(vocab, word_dims, tag_dims,
                                                   dropout_emb,
                                                   lstm_layers,
                                                   lstm_hiddens, dropout_lstm_input,
                                                   dropout_lstm_hidden,
                                                   mlp_arc_size,
                                                   mlp_rel_size, dropout_mlp, debug)
            parser.initialize()
            scheduler = ExponentialScheduler(learning_rate, decay, decay_steps)
            optimizer = mx.optimizer.Adam(learning_rate, beta_1, beta_2, epsilon,
                                          lr_scheduler=scheduler)
            trainer = gluon.Trainer(parser.collect_params(), optimizer=optimizer)
            data_loader = DataLoader(train_file, num_buckets_train, vocab)
            global_step = 0
            best_UAS = 0.
            batch_id = 0
            epoch = 1
            total_epoch = math.ceil(train_iters / validate_every)
            logger.info("Epoch {} out of {}".format(epoch, total_epoch))
            bar = Progbar(target=min(validate_every, data_loader.samples))
            while global_step < train_iters:
                for words, tags, arcs, rels in data_loader.get_batches(batch_size=train_batch_size,
                                                                       shuffle=True):
                    with autograd.record():
                        arc_accuracy, rel_accuracy, overall_accuracy, loss = parser.forward(words, tags, arcs,
                                                                                            rels)
                        loss_value = loss.asscalar()
                    loss.backward()
                    trainer.step(train_batch_size)
                    batch_id += 1
                    try:
                        bar.update(batch_id,
                                   exact=[("UAS", arc_accuracy, 2),
                                          # ("LAS", rel_accuracy, 2),
                                          # ("ALL", overall_accuracy, 2),
                                          ("loss", loss_value)])
                    except OverflowError:
                        pass  # sometimes loss can be 0 or infinity, crashes the bar

                    global_step += 1
                    if global_step % validate_every == 0:
                        bar = Progbar(target=min(validate_every, train_iters - global_step))
                        batch_id = 0
                        UAS, LAS, speed = evaluate_official_script(parser, vocab, num_buckets_valid,
                                                                   test_batch_size,
                                                                   dev_file,
                                                                   os.path.join(save_dir, 'valid_tmp'))
                        logger.info('Dev: UAS %.2f%% LAS %.2f%% %d sents/s' % (UAS, LAS, speed))
                        epoch += 1
                        if global_step < train_iters:
                            logger.info("Epoch {} out of {}".format(epoch, total_epoch))
                        if global_step > save_after and UAS > best_UAS:
                            logger.info('- new best score!')
                            best_UAS = UAS
                            parser.save(config.save_model_path)

        # When validate_every is too big
        if not os.path.isfile(config.save_model_path) or best_UAS != UAS:
            parser.save(config.save_model_path)

        return self