def train(n_epoch=20, batch_size=100, n_units=1000, device=-1): train, test = chainer.datasets.get_mnist() train_x, train_y = [np.array(cols) for cols in zip(*train)] test_x, test_y = [np.array(cols) for cols in zip(*test)] model = MLP(n_units, 10) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu(device) optimizer = chainer.optimizers.Adam().setup(model) trainer = training.Trainer( optimizer, model, loss_func=chainer.functions.softmax_cross_entropy, accuracy_func=chainer.functions.accuracy) trainer.configure( hooks={ training.EPOCH_TRAIN_BEGIN: lambda _: set_chainer_train(True), training.EPOCH_VALIDATE_BEGIN: lambda _: set_chainer_train(False) }, converter=lambda x: chainer.dataset.convert.to_device(device, x)) trainer.add_listener( training.listeners.ProgressBar(lambda n: tqdm(total=n)), priority=200) trainer.fit((train_x, train_y), (test_x, test_y), n_epoch, batch_size)
def train(train_file, test_file, word_embed_file=None, word_embed_size=100, char_embed_size=30, n_epoch=20, batch_size=10, lr=0.01, gpu=-1, seed=None): if seed is not None: import random import numpy random.seed(seed) numpy.random.seed(seed) if gpu >= 0: try: import cupy cupy.cuda.runtime.setDevice(gpu) cupy.random.seed(seed) except Exception as e: logging.error(str(e)) logging.info("random seed: {}".format(seed)) framework_utils.set_debug(App.debug) # Load files logging.info('initialize DataLoader with word_embed_file={}, ' 'word_embed_size={}, and char_embed_size={}'.format( word_embed_file, word_embed_size, char_embed_size)) loader = DataLoader(word_embed_size, char_embed_size, word_embed_file) logging.info('load train dataset from {}'.format(train_file)) train_dataset = loader.load(train_file, train=True) if test_file: logging.info('load test dataset from {}'.format(test_file)) test_dataset = loader.load(test_file, train=False) else: test_dataset = None model = Model(word_embedding=loader.get_embeddings('word'), char_embedding=loader.get_embeddings('char'), n_labels=len(loader.tag_map)) logging.info('initialized model: {}'.format(model)) if gpu >= 0: chainer.cuda.get_device_from_id(gpu).use() model.to_gpu() optimizer = chainer.optimizers.Adam(lr) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(5.0)) def compute_loss(ys, ts): ys, ts = F.concat(ys, axis=0), F.concat(ts, axis=0) if gpu >= 0: ts.to_gpu() return F.softmax_cross_entropy(ys, ts, ignore_label=-1) def compute_accuracy(ys, ts): ys, ts = F.concat(ys, axis=0), F.concat(ts, axis=0) if gpu >= 0: ts.to_gpu() return F.accuracy(ys, ts, ignore_label=-1) trainer = training.Trainer(optimizer, model, loss_func=compute_loss, accuracy_func=compute_accuracy) trainer.configure(framework_utils.config) trainer.fit(train_dataset, None, batch_size=batch_size, epochs=n_epoch, validation_data=test_dataset, verbose=App.verbose)
def train(train_file, test_file=None, format='tree', embed_file=None, n_epoch=20, batch_size=20, lr=0.001, limit=-1, l2_lambda=0.0, grad_clip=5.0, encoder_input=('char', 'postag'), model_config=None, device=-1, save_dir=None, seed=None, cache_dir='', refresh_cache=False, bert_model=0, bert_dir=''): if seed is not None: utils.set_random_seed(seed, device) logger = logging.getLogger() # logger.configure(filename='log.txt', logdir=save_dir) assert isinstance(logger, logging.AppLogger) if model_config is None: model_config = {} model_config['bert_model'] = bert_model model_config['bert_dir'] = bert_dir os.makedirs(save_dir, exist_ok=True) read_genia = format == 'genia' loader = dataset.DataLoader.build( postag_embed_size=model_config.get('postag_embed_size', 50), char_embed_size=model_config.get('char_embed_size', 10), word_embed_file=embed_file, filter_coord=(not read_genia), refresh_cache=refresh_cache, format=format, cache_options=dict(dir=cache_dir, mkdir=True, logger=logger), extra_ids=(git.hash(), )) use_external_postags = not read_genia cont_embed_file_ext = _get_cont_embed_file_ext(encoder_input) use_cont_embed = cont_embed_file_ext is not None train_dataset = loader.load_with_external_resources( train_file, train=True, bucketing=False, size=None if limit < 0 else limit, refresh_cache=refresh_cache, use_external_postags=use_external_postags, use_contextualized_embed=use_cont_embed, contextualized_embed_file_ext=cont_embed_file_ext) logging.info('{} samples loaded for training'.format(len(train_dataset))) test_dataset = None if test_file is not None: test_dataset = loader.load_with_external_resources( test_file, train=False, bucketing=False, size=None if limit < 0 else limit // 10, refresh_cache=refresh_cache, use_external_postags=use_external_postags, use_contextualized_embed=use_cont_embed, contextualized_embed_file_ext=cont_embed_file_ext) logging.info('{} samples loaded for validation'.format( len(test_dataset))) builder = models.CoordSolverBuilder(loader, inputs=encoder_input, **model_config) logger.info("{}".format(builder)) model = builder.build() logger.trace("Model: {}".format(model)) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu(device) if bert_model == 1: optimizer = chainer.optimizers.AdamW(alpha=lr) optimizer.setup(model) # optimizer.add_hook(chainer.optimizer.GradientClipping(1.)) else: optimizer = chainer.optimizers.AdamW(alpha=lr, beta1=0.9, beta2=0.999, eps=1e-08) optimizer.setup(model) if l2_lambda > 0.0: optimizer.add_hook(chainer.optimizer.WeightDecay(l2_lambda)) if grad_clip > 0.0: optimizer.add_hook(chainer.optimizer.GradientClipping(grad_clip)) def _report(y, t): values = {} model.compute_accuracy(y, t) for k, v in model.result.items(): if 'loss' in k: values[k] = float(chainer.cuda.to_cpu(v.data)) elif 'accuracy' in k: values[k] = v training.report(values) trainer = training.Trainer(optimizer, model, loss_func=model.compute_loss) trainer.configure(utils.training_config) trainer.add_listener( training.listeners.ProgressBar(lambda n: tqdm(total=n)), priority=200) trainer.add_hook(training.BATCH_END, lambda data: _report(data['ys'], data['ts'])) if test_dataset: parser = parsers.build_parser(loader, model) evaluator = eval_module.Evaluator(parser, logger=logging, report_details=False) trainer.add_listener(evaluator) if bert_model == 2: num_train_steps = 20000 * 5 / 20 num_warmup_steps = 10000 / 20 learning_rate = 2e-5 # learning rate (eta) scheduling in Adam lr_decay_init = learning_rate * \ (num_train_steps - num_warmup_steps) / num_train_steps trainer.add_hook( training.BATCH_END, extensions.LinearShift( # decay 'eta', (lr_decay_init, 0.), (num_warmup_steps, num_train_steps), optimizer=optimizer)) trainer.add_hook( training.BATCH_END, extensions.WarmupShift( # warmup 'eta', 0., num_warmup_steps, learning_rate, optimizer=optimizer)) if save_dir is not None: accessid = logging.getLogger().accessid date = logging.getLogger().accesstime.strftime('%Y%m%d') # metric = 'whole' if isinstance(model, models.Teranishi17) else 'inner' metric = 'exact' trainer.add_listener( utils.Saver( model, basename="{}-{}".format(date, accessid), context=dict(App.context, builder=builder), directory=save_dir, logger=logger, save_best=True, evaluate=(lambda _: evaluator.get_overall_score(metric)))) trainer.fit(train_dataset, test_dataset, n_epoch, batch_size)
def train(train_file, test_file=None, embed_file=None, n_epoch=20, batch_size=5000, lr=2e-3, model_config=None, device=-1, save_dir=None, seed=None, cache_dir='', refresh_cache=False): if seed is not None: utils.set_random_seed(seed, device) logger = logging.getLogger() assert isinstance(logger, logging.AppLogger) if model_config is None: model_config = {} loader = dataset.DataLoader.build(input_file=train_file, word_embed_file=embed_file, refresh_cache=refresh_cache, extra_ids=(git.hash(), ), cache_options=dict(dir=cache_dir, mkdir=True, logger=logger)) train_dataset = loader.load(train_file, train=True, bucketing=True, refresh_cache=refresh_cache) test_dataset = None if test_file is not None: test_dataset = loader.load(test_file, train=False, bucketing=True, refresh_cache=refresh_cache) model = _build_parser(loader, **model_config) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu(device) optimizer = chainer.optimizers.Adam(alpha=lr, beta1=0.9, beta2=0.9, eps=1e-12) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(5.0)) optimizer.add_hook( optimizers.ExponentialDecayAnnealing(initial_lr=lr, decay_rate=0.75, decay_step=5000, lr_key='alpha')) def _report(y, t): arc_accuracy, rel_accuracy = model.compute_accuracy(y, t) training.report({ 'arc_accuracy': arc_accuracy, 'rel_accuracy': rel_accuracy }) trainer = training.Trainer(optimizer, model, loss_func=model.compute_loss) trainer.configure(utils.training_config) trainer.add_listener( training.listeners.ProgressBar(lambda n: tqdm(total=n)), priority=200) trainer.add_hook(training.BATCH_END, lambda data: _report(data['ys'], data['ts'])) if test_dataset: evaluator = Evaluator(model, loader.rel_map, test_file, logger) trainer.add_listener(evaluator, priority=128) if save_dir is not None: accessid = logger.accessid date = logger.accesstime.strftime('%Y%m%d') trainer.add_listener( utils.Saver(model, basename="{}-{}".format(date, accessid), context=dict(App.context, loader=loader), directory=save_dir, logger=logger, save_best=True, evaluate=(lambda _: evaluator._parsed['UAS']))) trainer.fit(train_dataset, test_dataset, n_epoch, batch_size)