Beispiel #1
0
def train(n_epoch=20, batch_size=100, n_units=1000, device=-1):
    train, test = chainer.datasets.get_mnist()
    train_x, train_y = [np.array(cols) for cols in zip(*train)]
    test_x, test_y = [np.array(cols) for cols in zip(*test)]

    model = MLP(n_units, 10)
    if device >= 0:
        chainer.cuda.get_device_from_id(device).use()
        model.to_gpu(device)
    optimizer = chainer.optimizers.Adam().setup(model)

    trainer = training.Trainer(
        optimizer,
        model,
        loss_func=chainer.functions.softmax_cross_entropy,
        accuracy_func=chainer.functions.accuracy)
    trainer.configure(
        hooks={
            training.EPOCH_TRAIN_BEGIN: lambda _: set_chainer_train(True),
            training.EPOCH_VALIDATE_BEGIN: lambda _: set_chainer_train(False)
        },
        converter=lambda x: chainer.dataset.convert.to_device(device, x))
    trainer.add_listener(
        training.listeners.ProgressBar(lambda n: tqdm(total=n)), priority=200)
    trainer.fit((train_x, train_y), (test_x, test_y), n_epoch, batch_size)
Beispiel #2
0
def train(train_file,
          test_file,
          word_embed_file=None,
          word_embed_size=100,
          char_embed_size=30,
          n_epoch=20,
          batch_size=10,
          lr=0.01,
          gpu=-1,
          seed=None):
    if seed is not None:
        import random
        import numpy
        random.seed(seed)
        numpy.random.seed(seed)
        if gpu >= 0:
            try:
                import cupy
                cupy.cuda.runtime.setDevice(gpu)
                cupy.random.seed(seed)
            except Exception as e:
                logging.error(str(e))
        logging.info("random seed: {}".format(seed))
    framework_utils.set_debug(App.debug)

    # Load files
    logging.info('initialize DataLoader with word_embed_file={}, '
                 'word_embed_size={}, and char_embed_size={}'.format(
                     word_embed_file, word_embed_size, char_embed_size))
    loader = DataLoader(word_embed_size, char_embed_size, word_embed_file)
    logging.info('load train dataset from {}'.format(train_file))
    train_dataset = loader.load(train_file, train=True)
    if test_file:
        logging.info('load test dataset from {}'.format(test_file))
        test_dataset = loader.load(test_file, train=False)
    else:
        test_dataset = None

    model = Model(word_embedding=loader.get_embeddings('word'),
                  char_embedding=loader.get_embeddings('char'),
                  n_labels=len(loader.tag_map))
    logging.info('initialized model: {}'.format(model))
    if gpu >= 0:
        chainer.cuda.get_device_from_id(gpu).use()
        model.to_gpu()

    optimizer = chainer.optimizers.Adam(lr)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(5.0))

    def compute_loss(ys, ts):
        ys, ts = F.concat(ys, axis=0), F.concat(ts, axis=0)
        if gpu >= 0:
            ts.to_gpu()
        return F.softmax_cross_entropy(ys, ts, ignore_label=-1)

    def compute_accuracy(ys, ts):
        ys, ts = F.concat(ys, axis=0), F.concat(ts, axis=0)
        if gpu >= 0:
            ts.to_gpu()
        return F.accuracy(ys, ts, ignore_label=-1)

    trainer = training.Trainer(optimizer,
                               model,
                               loss_func=compute_loss,
                               accuracy_func=compute_accuracy)
    trainer.configure(framework_utils.config)

    trainer.fit(train_dataset,
                None,
                batch_size=batch_size,
                epochs=n_epoch,
                validation_data=test_dataset,
                verbose=App.verbose)
Beispiel #3
0
def train(train_file,
          test_file=None,
          format='tree',
          embed_file=None,
          n_epoch=20,
          batch_size=20,
          lr=0.001,
          limit=-1,
          l2_lambda=0.0,
          grad_clip=5.0,
          encoder_input=('char', 'postag'),
          model_config=None,
          device=-1,
          save_dir=None,
          seed=None,
          cache_dir='',
          refresh_cache=False,
          bert_model=0,
          bert_dir=''):
    if seed is not None:
        utils.set_random_seed(seed, device)
    logger = logging.getLogger()
    # logger.configure(filename='log.txt', logdir=save_dir)
    assert isinstance(logger, logging.AppLogger)
    if model_config is None:
        model_config = {}
    model_config['bert_model'] = bert_model
    model_config['bert_dir'] = bert_dir

    os.makedirs(save_dir, exist_ok=True)

    read_genia = format == 'genia'
    loader = dataset.DataLoader.build(
        postag_embed_size=model_config.get('postag_embed_size', 50),
        char_embed_size=model_config.get('char_embed_size', 10),
        word_embed_file=embed_file,
        filter_coord=(not read_genia),
        refresh_cache=refresh_cache,
        format=format,
        cache_options=dict(dir=cache_dir, mkdir=True, logger=logger),
        extra_ids=(git.hash(), ))

    use_external_postags = not read_genia
    cont_embed_file_ext = _get_cont_embed_file_ext(encoder_input)
    use_cont_embed = cont_embed_file_ext is not None

    train_dataset = loader.load_with_external_resources(
        train_file,
        train=True,
        bucketing=False,
        size=None if limit < 0 else limit,
        refresh_cache=refresh_cache,
        use_external_postags=use_external_postags,
        use_contextualized_embed=use_cont_embed,
        contextualized_embed_file_ext=cont_embed_file_ext)
    logging.info('{} samples loaded for training'.format(len(train_dataset)))
    test_dataset = None
    if test_file is not None:
        test_dataset = loader.load_with_external_resources(
            test_file,
            train=False,
            bucketing=False,
            size=None if limit < 0 else limit // 10,
            refresh_cache=refresh_cache,
            use_external_postags=use_external_postags,
            use_contextualized_embed=use_cont_embed,
            contextualized_embed_file_ext=cont_embed_file_ext)
        logging.info('{} samples loaded for validation'.format(
            len(test_dataset)))

    builder = models.CoordSolverBuilder(loader,
                                        inputs=encoder_input,
                                        **model_config)
    logger.info("{}".format(builder))
    model = builder.build()
    logger.trace("Model: {}".format(model))
    if device >= 0:
        chainer.cuda.get_device_from_id(device).use()
        model.to_gpu(device)

    if bert_model == 1:
        optimizer = chainer.optimizers.AdamW(alpha=lr)
        optimizer.setup(model)
        # optimizer.add_hook(chainer.optimizer.GradientClipping(1.))
    else:
        optimizer = chainer.optimizers.AdamW(alpha=lr,
                                             beta1=0.9,
                                             beta2=0.999,
                                             eps=1e-08)
        optimizer.setup(model)
        if l2_lambda > 0.0:
            optimizer.add_hook(chainer.optimizer.WeightDecay(l2_lambda))
        if grad_clip > 0.0:
            optimizer.add_hook(chainer.optimizer.GradientClipping(grad_clip))

    def _report(y, t):
        values = {}
        model.compute_accuracy(y, t)
        for k, v in model.result.items():
            if 'loss' in k:
                values[k] = float(chainer.cuda.to_cpu(v.data))
            elif 'accuracy' in k:
                values[k] = v
        training.report(values)

    trainer = training.Trainer(optimizer, model, loss_func=model.compute_loss)
    trainer.configure(utils.training_config)
    trainer.add_listener(
        training.listeners.ProgressBar(lambda n: tqdm(total=n)), priority=200)
    trainer.add_hook(training.BATCH_END,
                     lambda data: _report(data['ys'], data['ts']))
    if test_dataset:
        parser = parsers.build_parser(loader, model)
        evaluator = eval_module.Evaluator(parser,
                                          logger=logging,
                                          report_details=False)
        trainer.add_listener(evaluator)

    if bert_model == 2:
        num_train_steps = 20000 * 5 / 20
        num_warmup_steps = 10000 / 20
        learning_rate = 2e-5
        # learning rate (eta) scheduling in Adam
        lr_decay_init = learning_rate * \
            (num_train_steps - num_warmup_steps) / num_train_steps
        trainer.add_hook(
            training.BATCH_END,
            extensions.LinearShift(  # decay
                'eta', (lr_decay_init, 0.),
                (num_warmup_steps, num_train_steps),
                optimizer=optimizer))
        trainer.add_hook(
            training.BATCH_END,
            extensions.WarmupShift(  # warmup
                'eta',
                0.,
                num_warmup_steps,
                learning_rate,
                optimizer=optimizer))

    if save_dir is not None:
        accessid = logging.getLogger().accessid
        date = logging.getLogger().accesstime.strftime('%Y%m%d')
        # metric = 'whole' if isinstance(model, models.Teranishi17) else 'inner'
        metric = 'exact'
        trainer.add_listener(
            utils.Saver(
                model,
                basename="{}-{}".format(date, accessid),
                context=dict(App.context, builder=builder),
                directory=save_dir,
                logger=logger,
                save_best=True,
                evaluate=(lambda _: evaluator.get_overall_score(metric))))

    trainer.fit(train_dataset, test_dataset, n_epoch, batch_size)
Beispiel #4
0
def train(train_file,
          test_file=None,
          embed_file=None,
          n_epoch=20,
          batch_size=5000,
          lr=2e-3,
          model_config=None,
          device=-1,
          save_dir=None,
          seed=None,
          cache_dir='',
          refresh_cache=False):
    if seed is not None:
        utils.set_random_seed(seed, device)
    logger = logging.getLogger()
    assert isinstance(logger, logging.AppLogger)
    if model_config is None:
        model_config = {}

    loader = dataset.DataLoader.build(input_file=train_file,
                                      word_embed_file=embed_file,
                                      refresh_cache=refresh_cache,
                                      extra_ids=(git.hash(), ),
                                      cache_options=dict(dir=cache_dir,
                                                         mkdir=True,
                                                         logger=logger))
    train_dataset = loader.load(train_file,
                                train=True,
                                bucketing=True,
                                refresh_cache=refresh_cache)
    test_dataset = None
    if test_file is not None:
        test_dataset = loader.load(test_file,
                                   train=False,
                                   bucketing=True,
                                   refresh_cache=refresh_cache)

    model = _build_parser(loader, **model_config)
    if device >= 0:
        chainer.cuda.get_device_from_id(device).use()
        model.to_gpu(device)
    optimizer = chainer.optimizers.Adam(alpha=lr,
                                        beta1=0.9,
                                        beta2=0.9,
                                        eps=1e-12)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(5.0))
    optimizer.add_hook(
        optimizers.ExponentialDecayAnnealing(initial_lr=lr,
                                             decay_rate=0.75,
                                             decay_step=5000,
                                             lr_key='alpha'))

    def _report(y, t):
        arc_accuracy, rel_accuracy = model.compute_accuracy(y, t)
        training.report({
            'arc_accuracy': arc_accuracy,
            'rel_accuracy': rel_accuracy
        })

    trainer = training.Trainer(optimizer, model, loss_func=model.compute_loss)
    trainer.configure(utils.training_config)
    trainer.add_listener(
        training.listeners.ProgressBar(lambda n: tqdm(total=n)), priority=200)
    trainer.add_hook(training.BATCH_END,
                     lambda data: _report(data['ys'], data['ts']))
    if test_dataset:
        evaluator = Evaluator(model, loader.rel_map, test_file, logger)
        trainer.add_listener(evaluator, priority=128)
        if save_dir is not None:
            accessid = logger.accessid
            date = logger.accesstime.strftime('%Y%m%d')
            trainer.add_listener(
                utils.Saver(model,
                            basename="{}-{}".format(date, accessid),
                            context=dict(App.context, loader=loader),
                            directory=save_dir,
                            logger=logger,
                            save_best=True,
                            evaluate=(lambda _: evaluator._parsed['UAS'])))
    trainer.fit(train_dataset, test_dataset, n_epoch, batch_size)