def test_alias(self): # By keyword trigger = triggers.EarlyStoppingTrigger(patients=10) assert trigger.patience == 10 # By positional trigger = triggers.EarlyStoppingTrigger((1, 'epoch'), 'main/loss', 10) assert trigger.patience == 10 # Duplicated, by keyword with pytest.raises(TypeError): triggers.EarlyStoppingTrigger(patience=10, patients=3) # Duplicated, by positional with pytest.raises(TypeError): triggers.EarlyStoppingTrigger( (1, 'epoch'), 'main/loss', 10, patients=3)
def test_early_stopping_trigger_with_loss(self): key = 'main/loss' trigger = triggers.EarlyStoppingTrigger(monitor=key, patience=3, check_trigger=(1, 'epoch')) trigger = util.get_trigger(trigger) accuracies = [100, 80, 30, 10, 20, 24, 30, 35] accuracies = numpy.asarray([ chainer.Variable(numpy.asarray(acc, dtype=numpy.float32)) for acc in accuracies]) expected = [False, False, False, False, False, False, True, True] _test_trigger(self, trigger, key, accuracies, expected)
def test_early_stopping_trigger_with_accuracy(self): key = 'main/accuracy' trigger = triggers.EarlyStoppingTrigger(monitor=key, patience=3, check_trigger=(1, 'epoch'), verbose=False) trigger = util.get_trigger(trigger) accuracies = [0.5, 0.5, 0.6, 0.7, 0.6, 0.4, 0.3, 0.2] accuracies = numpy.asarray([ chainer.Variable(numpy.asarray(acc, dtype=numpy.float32)) for acc in accuracies]) expected = [False, False, False, False, False, False, True, True] _test_trigger(self, trigger, key, accuracies, expected)
def TrainUNet(X, Y, model_=None, optimizer_=None, epoch=40, alpha=0.001, gpu_id=0, loop=1, earlystop=True): assert (len(X) == len(Y)) d_time = datetime.datetime.now().strftime("%m-%d-%H-%M-%S") # 1. Model load. # print(sum(p.data.size for p in model.unet.params())) if model_ is not None: model = Regressor(model_) print("## model loaded.") else: model = Regressor(UNet()) model.compute_accuracy = False if gpu_id >= 0: model.to_gpu(gpu_id) # 2. optimizer load. if optimizer_ is not None: opt = optimizer_ print("## optimizer loaded.") else: opt = optimizers.Adam(alpha=alpha) opt.setup(model) # 3. Data Split. dataset = Unet_DataSet(X, Y) print("# number of patterns", len(dataset)) train, valid = \ split_dataset_random(dataset, int(len(dataset) * 0.8), seed=0) # 4. Iterator train_iter = SerialIterator(train, batch_size=C.BATCH_SIZE) test_iter = SerialIterator(valid, batch_size=C.BATCH_SIZE, repeat=False, shuffle=False) # 5. config train, enable backprop chainer.config.train = True chainer.config.enable_backprop = True # 6. UnetUpdater updater = UnetUpdater(train_iter, opt, model, device=gpu_id) # 7. EarlyStopping if earlystop: stop_trigger = triggers.EarlyStoppingTrigger( monitor='validation/main/loss', max_trigger=(epoch, 'epoch'), patients=5) else: stop_trigger = (epoch, 'epoch') # 8. Trainer trainer = training.Trainer(updater, stop_trigger, out=C.PATH_TRAINRESULT) # 8.1. UnetEvaluator trainer.extend(UnetEvaluator(test_iter, model, device=gpu_id)) trainer.extend(SaveRestore(), trigger=triggers.MinValueTrigger('validation/main/loss')) # 8.2. Extensions LogReport trainer.extend(extensions.LogReport()) # 8.3. Extension Snapshot # trainer.extend(extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}')) # trainer.extend(extensions.snapshot_object(model.unet, filename='loop' + str(loop) + '.model')) # 8.4. Print Report trainer.extend(extensions.observe_lr()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'elapsed_time', 'lr' ])) # 8.5. Extension Graph trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loop-' + str(loop) + '-loss' + d_time + '.png')) # trainer.extend(extensions.dump_graph('main/loss')) # 8.6. Progree Bar trainer.extend(extensions.ProgressBar()) # 9. Trainer run trainer.run() chainer.serializers.save_npz(C.PATH_TRAINRESULT / ('loop' + str(loop)), model.unet) return model.unet, opt
def train_phase(generator, train, valid, args): print('# samples:') print('-- train:', len(train)) print('-- valid:', len(valid)) # setup dataset iterators train_batchsize = min(args.batchsize * len(args.gpu), len(train)) valid_batchsize = args.batchsize train_iter = chainer.iterators.MultiprocessIterator(train, train_batchsize) valid_iter = chainer.iterators.SerialIterator(valid, valid_batchsize, repeat=False, shuffle=True) # setup a model model = Regressor(generator, activation=F.tanh, lossfun=F.mean_absolute_error, accfun=F.mean_absolute_error) discriminator = build_discriminator() discriminator.save_args(os.path.join(args.out, 'discriminator.json')) if args.gpu[0] >= 0: chainer.backends.cuda.get_device_from_id(args.gpu[0]).use() if len(args.gpu) == 1: model.to_gpu() discriminator.to_gpu() # setup an optimizer optimizer_G = chainer.optimizers.Adam(alpha=args.lr, beta1=args.beta, beta2=0.999, eps=1e-08, amsgrad=False) optimizer_G.setup(model) optimizer_D = chainer.optimizers.Adam(alpha=args.lr, beta1=args.beta, beta2=0.999, eps=1e-08, amsgrad=False) optimizer_D.setup(discriminator) if args.decay > 0: optimizer_G.add_hook(chainer.optimizer_hooks.WeightDecay(args.decay)) optimizer_D.add_hook(chainer.optimizer_hooks.WeightDecay(args.decay)) # setup a trainer if len(args.gpu) == 1: updater = DCGANUpdater( iterator=train_iter, optimizer={ 'gen': optimizer_G, 'dis': optimizer_D, }, alpha=args.alpha, device=args.gpu[0], ) else: devices = {'main': args.gpu[0]} for idx, g in enumerate(args.gpu[1:]): devices['slave_%d' % idx] = g raise NotImplementedError('The parallel updater is not supported..') frequency = max(args.iteration // 80, 1) if args.frequency == -1 else max(1, args.frequency) stop_trigger = triggers.EarlyStoppingTrigger( monitor='validation/main/loss', max_trigger=(args.iteration, 'iteration'), check_trigger=(frequency, 'iteration'), patients=np.inf if args.pinfall == -1 else max(1, args.pinfall)) trainer = training.Trainer(updater, stop_trigger, out=args.out) # shift lr trainer.extend( extensions.LinearShift('alpha', (args.lr, 0.0), (args.iteration // 2, args.iteration), optimizer=optimizer_G)) trainer.extend( extensions.LinearShift('alpha', (args.lr, 0.0), (args.iteration // 2, args.iteration), optimizer=optimizer_D)) # setup a visualizer transforms = {'x': lambda x: x, 'y': lambda x: x, 't': lambda x: x} clims = {'x': (-1., 1.), 'y': (-1., 1.), 't': (-1., 1.)} visualizer = ImageVisualizer(transforms=transforms, cmaps=None, clims=clims) # setup a validator valid_file = os.path.join('validation', 'iter_{.updater.iteration:08}.png') trainer.extend(Validator(valid_iter, model, valid_file, visualizer=visualizer, n_vis=20, device=args.gpu[0]), trigger=(frequency, 'iteration')) trainer.extend( extensions.dump_graph('loss_gen', filename='generative_loss.dot')) trainer.extend( extensions.dump_graph('loss_cond', filename='conditional_loss.dot')) trainer.extend( extensions.dump_graph('loss_dis', filename='discriminative_loss.dot')) trainer.extend(extensions.snapshot( filename='snapshot_iter_{.updater.iteration:08}.npz'), trigger=(frequency, 'iteration')) trainer.extend(extensions.snapshot_object( generator, 'generator_iter_{.updater.iteration:08}.npz'), trigger=(frequency, 'iteration')) trainer.extend(extensions.snapshot_object( discriminator, 'discriminator_iter_{.updater.iteration:08}.npz'), trigger=(frequency, 'iteration')) log_keys = [ 'loss_gen', 'loss_cond', 'loss_dis', 'validation/main/accuracy' ] trainer.extend(LogReport(keys=log_keys, trigger=(100, 'iteration'))) # setup log ploter if extensions.PlotReport.available(): for plot_key in ['loss', 'accuracy']: plot_keys = [ key for key in log_keys if key.split('/')[-1].startswith(plot_key) ] trainer.extend( extensions.PlotReport(plot_keys, 'iteration', file_name=plot_key + '.png', trigger=(frequency, 'iteration'))) trainer.extend( PrintReport(['iteration'] + log_keys + ['elapsed_time'], n_step=1)) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # train trainer.run()
model = L.Classifier(net, lossfun=F.mean_squared_error) model.compute_accuracy = False # regression gpu_id = 0 cuda.get_device(gpu_id).use() model.to_gpu(gpu_id) max_epoch = 16 print('max_epoch: {}'.format(max_epoch)) now = datetime.datetime.now() now = now.strftime('%Y%m%d%H%M') optimizer = optimizers.Adam().setup(model) updater = training.StandardUpdater(train_iter, optimizer, device=0) # trainer = training.Trainer(updater, (max_epoch, 'epoch'), out='result/{}'.format(now)) trigger = triggers.EarlyStoppingTrigger(monitor='val/main/loss', check_trigger=(1, 'epoch'), patients=5, max_trigger=(max_epoch, 'epoch')) trainer = training.Trainer(updater, trigger, out='result/{}'.format(now)) trainer.extend(extensions.LogReport(trigger=(1, 'epoch'), log_name='log')) trainer.extend(extensions.Evaluator(valid_iter, model, device=0), name='val') trainer.extend( extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend( extensions.PrintReport( ['epoch', 'main/loss', 'val/main/loss', 'elapsed_time'])) trainer.run()
def train(args): if not os.path.exists(args.out): os.makedirs(args.out) if args.gpu >= 0: cuda.check_cuda_available() cuda.get_device(args.gpu).use() if args.random_seed: set_random_seed(args.random_seed, (args.gpu,)) user2index = load_dict(os.path.join(args.indir, USER_DICT_FILENAME)) item2index = load_dict(os.path.join(args.indir, ITEM_DICT_FILENAME)) (trimmed_word2count, word2index, aspect2index, opinion2index) = read_and_trim_vocab( args.indir, args.trimfreq ) aspect_opinions = get_aspect_opinions(os.path.join(args.indir, TRAIN_FILENAME)) export_params( args, user2index, item2index, trimmed_word2count, word2index, aspect2index, opinion2index, aspect_opinions, ) src_aspect_score = SOURCE_ASPECT_SCORE.get(args.context, "aspect_score_efm") data_loader = DataLoader( args.indir, user2index, item2index, trimmed_word2count, word2index, aspect2index, opinion2index, aspect_opinions, src_aspect_score, ) train_iter, val_iter = get_dataset_iterator( args.context, data_loader, args.batchsize ) model = get_context_model(args, data_loader) if args.optimizer == "rmsprop": optimizer = O.RMSprop(lr=args.learning_rate, alpha=args.alpha) elif args.optimizer == "adam": optimizer = O.Adam(amsgrad=args.amsgrad) optimizer.setup(model) if args.grad_clip: optimizer.add_hook(GradientClipping(args.grad_clip)) if args.gpu >= 0: model.to_gpu(args.gpu) updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=convert, device=args.gpu ) early_stop = triggers.EarlyStoppingTrigger( monitor="validation/main/loss", patients=args.patients, max_trigger=(args.epoch, "epoch"), ) trainer = training.Trainer(updater, stop_trigger=early_stop, out=args.out) trainer.extend( extensions.Evaluator(val_iter, model, converter=convert, device=args.gpu) ) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport( ["epoch", "main/loss", "validation/main/loss", "lr", "elapsed_time"] ) ) trainer.extend( extensions.PlotReport( ["main/loss", "validation/main/loss"], x_key="epoch", file_name="loss.png" ) ) trainer.extend(extensions.ProgressBar()) trainer.extend( extensions.snapshot_object(model, MODEL_FILENAME), trigger=triggers.MinValueTrigger("validation/main/loss"), ) trainer.extend(extensions.observe_lr()) if args.optimizer in ["rmsprop"]: if args.schedule_lr: epoch_list = np.array( [i for i in range(1, int(args.epoch / args.stepsize) + 1)] ).astype(np.int32) value_list = args.learning_rate * args.lr_reduce ** epoch_list value_list[value_list < args.min_learning_rate] = args.min_learning_rate epoch_list *= args.stepsize epoch_list += args.begin_step trainer.extend( schedule_optimizer_value(epoch_list.tolist(), value_list.tolist()) ) trainer.run()
batchsize, repeat=False, shuffle=False) # network setup net = mymodel.create() # trainer setup optimizer = chainer.optimizers.Adam().setup(net) updater = chainer.training.StandardUpdater(train_iter, optimizer, device=gpu_id) # early stopping # https://qiita.com/klis/items/7865d9e8e757f16bc39c stop_trigger = triggers.EarlyStoppingTrigger(monitor='val/main/loss', max_trigger=(max_epoch, 'epoch')) trainer = chainer.training.Trainer(updater, stop_trigger) trainer.extend(extensions.Evaluator(valid_iter, net, device=gpu_id), name='val') trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ExponentialShift("alpha", 0.9999)) ## Let's train! trainer.run()
def train_model(self, datasets): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=10, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--early-stopping', type=str, help='Metric to watch for early stopping') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() self.model.to_gpu() optimizer = chainer.optimizers.Adam(args.learnrate) optimizer.setup(self.model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) train, test = split_dataset(datasets, 80) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) stop_trigger = (args.epoch, 'epoch') # Early stopping option if args.early_stopping: stop_trigger = triggers.EarlyStoppingTrigger( monitor=args.early_stopping, verbose=True, max_trigger=(args.epoch, 'epoch')) # Set up a trainer updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu, loss_func=mean_squared_error) trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend( extensions.Evaluator(test_iter, self.model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend( extensions.snapshot(filename='snaphot_epoch_{.updater.epoch}')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) print(train[:1]) # Run the training trainer.run() return self.model
def main(): # Parse the arguments. args = parse_arguments() augment = False if args.augment == 'False' else True multi_gpu = False if args.multi_gpu == 'False' else True if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): label_arr = np.asarray(label_list, dtype=np.int32) return label_arr # Apply a preprocessor to the dataset. logging.info('Preprocess train dataset and test dataset...') preprocessor = preprocess_method_dict[args.method]() parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) train = parser.parse(args.train_datafile)['dataset'] valid = parser.parse(args.valid_datafile)['dataset'] if augment: logging.info('Utilizing data augmentation in train set') train = augment_dataset(train) num_train = train.get_datasets()[0].shape[0] num_valid = valid.get_datasets()[0].shape[0] logging.info('Train/test split: {}/{}'.format(num_train, num_valid)) if len(args.net_hidden_dims): net_hidden_dims = tuple([int(net_hidden_dim) for net_hidden_dim in args.net_hidden_dims.split(',')]) else: net_hidden_dims = () fp_attention = True if args.fp_attention else False update_attention = True if args.update_attention else False weight_tying = False if args.weight_tying == 'False' else True attention_tying = False if args.attention_tying == 'False' else True fp_batch_normalization = True if args.fp_bn == 'True' else False layer_aggregator = None if args.layer_aggregator == '' else args.layer_aggregator context = False if args.context == 'False' else True output_activation = functions.relu if args.output_activation == 'relu' else None predictor = set_up_predictor(method=args.method, fp_hidden_dim=args.fp_hidden_dim, fp_out_dim=args.fp_out_dim, conv_layers=args.conv_layers, concat_hidden=args.concat_hidden, layer_aggregator=layer_aggregator, fp_dropout_rate=args.fp_dropout_rate, fp_batch_normalization=fp_batch_normalization, net_hidden_dims=net_hidden_dims, class_num=class_num, sim_method=args.sim_method, fp_attention=fp_attention, weight_typing=weight_tying, attention_tying=attention_tying, update_attention=update_attention, context=context, context_layers=args.context_layers, context_dropout=args.context_dropout, message_function=args.message_function, readout_function=args.readout_function, num_timesteps=args.num_timesteps, num_output_hidden_layers=args.num_output_hidden_layers, output_hidden_dim=args.output_hidden_dim, output_activation=output_activation, symmetric=args.symmetric ) train_iter = SerialIterator(train, args.batchsize) test_iter = SerialIterator(valid, args.batchsize, repeat=False, shuffle=False) metrics_fun = {'accuracy': F.binary_accuracy} classifier = Classifier(predictor, lossfun=F.sigmoid_cross_entropy, metrics_fun=metrics_fun, device=args.gpu) # Set up the optimizer. optimizer = optimizers.Adam(alpha=args.learning_rate, weight_decay_rate=args.weight_decay_rate) # optimizer = optimizers.Adam() # optimizer = optimizers.SGD(lr=args.learning_rate) optimizer.setup(classifier) # add regularization if args.max_norm > 0: optimizer.add_hook(chainer.optimizer.GradientClipping(threshold=args.max_norm)) if args.l2_rate > 0: optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.l2_rate)) if args.l1_rate > 0: optimizer.add_hook(chainer.optimizer.Lasso(rate=args.l1_rate)) # Set up the updater. if multi_gpu: logging.info('Using multiple GPUs') updater = training.ParallelUpdater(train_iter, optimizer, devices={'main': 0, 'second': 1}, converter=concat_mols) else: logging.info('Using single GPU') updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) # Set up the trainer. logging.info('Training...') # add stop_trigger parameter early_stop = triggers.EarlyStoppingTrigger(monitor='validation/main/loss', patients=30, max_trigger=(500, 'epoch')) out = 'output' + '/' + args.out trainer = training.Trainer(updater, stop_trigger=early_stop, out=out) # trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(E.Evaluator(test_iter, classifier, device=args.gpu, converter=concat_mols)) train_eval_iter = SerialIterator(train, args.batchsize, repeat=False, shuffle=False) trainer.extend(AccuracyEvaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_acc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(AccuracyEvaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_acc', pos_labels=1, ignore_labels=-1)) trainer.extend(ROCAUCEvaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_roc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(ROCAUCEvaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_roc', pos_labels=1, ignore_labels=-1)) trainer.extend(PRCAUCEvaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_prc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(PRCAUCEvaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_prc', pos_labels=1, ignore_labels=-1)) trainer.extend(F1Evaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_f', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(F1Evaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_f', pos_labels=1, ignore_labels=-1)) # apply shift strategy to learning rate every 10 epochs # trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=(10, 'epoch')) if args.exp_shift_strategy == 1: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger([10, 20, 30, 40, 50, 60], 'epoch')) elif args.exp_shift_strategy == 2: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger([5, 10, 15, 20, 25, 30], 'epoch')) elif args.exp_shift_strategy == 3: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger([5, 10, 15, 20, 25, 30, 40, 50, 60, 70], 'epoch')) else: raise ValueError('No such strategy to adapt learning rate') # # observation of learning rate trainer.extend(E.observe_lr(), trigger=(1, 'iteration')) entries = [ 'epoch', 'main/loss', 'train_acc/main/accuracy', 'train_roc/main/roc_auc', 'train_prc/main/prc_auc', # 'train_p/main/precision', 'train_r/main/recall', 'train_f/main/f1', 'validation/main/loss', 'val_acc/main/accuracy', 'val_roc/main/roc_auc', 'val_prc/main/prc_auc', # 'val_p/main/precision', 'val_r/main/recall', 'val_f/main/f1', 'lr', 'elapsed_time'] trainer.extend(E.PrintReport(entries=entries)) # change from 10 to 2 on Mar. 1 2019 trainer.extend(E.snapshot(), trigger=(2, 'epoch')) trainer.extend(E.LogReport()) trainer.extend(E.ProgressBar()) trainer.extend(E.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend(E.PlotReport(['train_acc/main/accuracy', 'val_acc/main/accuracy'], 'epoch', file_name='accuracy.png')) if args.resume: resume_path = os.path.join(out, args.resume) logging.info('Resume training according to snapshot in {}'.format(resume_path)) chainer.serializers.load_npz(resume_path, trainer) trainer.run() # Save the regressor's parameters. model_path = os.path.join(out, args.model_filename) logging.info('Saving the trained models to {}...'.format(model_path)) classifier.save_pickle(model_path, protocol=args.protocol)
def main(): parser = argparse.ArgumentParser( 'Reinforced Mnemonic Reader', formatter_class=argparse.ArgumentDefaultsHelpFormatter) # Read parameters add_train_args(parser) args = parser.parse_args() set_defaults(args) set_random_seed(args.random_seed) train_data = DataUtils.load_data(args.train_file) dev_data = DataUtils.load_data(args.dev_file) # for Debug if DataUtils.IS_DEBUG: train_data = train_data[:128] dev_data = dev_data[:128] else: train_size = int(len(train_data) * args.train_ratio) print(train_size) train_data = train_data[:train_size] dev_size = int(len(dev_data) * args.dev_ratio) print(dev_size) dev_data = dev_data[:dev_size] all_data = train_data + dev_data args.pos_size = DataUtils.transform_pos_feature(all_data) args.ner_size = DataUtils.transform_ner_feature(all_data) args.qtype_size = 11 # max_question_len = DataUtils.get_max_question_len(all_data) args.context_max_length = DataUtils.get_max_len(all_data, DataType.CONTEXT) max_question_len = DataUtils.get_max_len(all_data, DataType.QUESTION) DataUtils.MAX_DOC_LENGTH = args.context_max_length DataUtils.MAX_Q_LENGTH = max_question_len DataUtils.cal_mask(train_data, args.context_max_length, max_question_len) DataUtils.cal_mask(dev_data, args.context_max_length, max_question_len) ## # pretrain_embedding_file = os.path.join(args.embed_dir, "pretrain_embedding_v6_a_0.005_0.05") # pretrain_index_file = os.path.join(args.embed_dir, "pretrain_index_file_v6_0.005_0.05.txt") pretrain_embedding_file = os.path.join( args.embed_dir, "pretrain_embedding_v6_a_" + str(args.train_ratio) + "_" + str(args.dev_ratio)) pretrain_index_file = os.path.join( args.embed_dir, "pretrain_index_file_v6_" + str(args.train_ratio) + "_" + str(args.dev_ratio) + ".txt") # args.w_embeddings = DataUtils.load_embedding(all_data, args.embedding_file, args.embedding_dim) args.w_embeddings = DataUtils.load_embedding( all_data, args.embedding_file, args.embedding_dim, pretrained_embedding_file=pretrain_embedding_file, pretrained_index_file=pretrain_index_file, overwrite=False) # args.w_embeddings = object() # DataUtils.load_embedding(all_data, args.embedding_file, args.embedding_dim, args.w_embeddings) if DataUtils.IS_DEBUG: print("load_embedding : finished...") pretrain_char_embedding_file = os.path.join( args.embed_dir, "pretrain_char_embedding_v6_a_" + str(args.train_ratio) + "_" + str(args.dev_ratio)) pretrain_char_index_file = os.path.join( args.embed_dir, "pretrain_char_index_file_v6_a_" + str(args.train_ratio) + "_" + str(args.dev_ratio) + ".txt") args.char_embeddings = DataUtils.load_char_embedding( all_data, args.char_embedding_file, args.char_embedding_dim, pretrained_embedding_file=pretrain_char_embedding_file, pretrained_index_file=pretrain_char_index_file, overwrite=False) args.vocab_size = len(DataUtils.word_dict) args.char_size = len(DataUtils.char_dict) print(args.vocab_size) if args.use_dict: # add dictionary args.dictionary = DataUtils.load_dictionary(args.dictionary_file, args.dict_embedding_file) if args.use_elmo: # add elmo online # initialize elmo batcher DataUtils.load_elmo_batcher(args.vocab_file) # add elmo embedding offline # DataUtils.load_elmo_embedding(args.context_elmo_embedding, args.question_elmo_embedding) train_data_input = chainer.datasets.TransformDataset( train_data, DataUtils.convert_item) # dev_data = chainer.datasets.TransformDataset(dev_data, DataUtils.convert_item_dev) # because of memory # args.batch_size = 32 # args.batch_size = 16 # args.batch_size = 4 args.num_features = 4 args.num_epochs = 100 # cg args.dot_file = "cg_f__.dot" """test start""" """ model = MReader_V6(args) if args.fine_tune: if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) f = open('result/model_t.pkl', 'rb') model = pickle.load(f) chainer.serializers.load_npz('result/best_model_0.005_0.05', model) if args.predict: with chainer.using_config('train', False): predictor = MReaderEvaluatorPred( model, dev_data, device=args.gpu, f1_key='validation/main/f1', em_key='validation/main/em', batch_size=args.batch_size, dot_file='cg_n.dot' ) predictor() return """ """test end""" args.backup_fine_tune = False if args.fine_tune: args.backup_fine_tune = True args.fine_tune = False # model = MReader_V3(args) model = MReader_V6(args) if args.fine_tune: chainer.serializers.load_npz('result/best_model', model) # chainer.serializers.load_npz('result/snapshot_iter_547', model, path='updater/model:main/') if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) # optimizer = chainer.optimizers.Adam(alpha=args.learning_rate) optimizer = chainer.optimizers.Adam( alpha=args.reader_initial_learning_rate) optimizer.setup(model) train_iter = chainer.iterators.SerialIterator(train_data_input, args.batch_size) # validation_iter = chainer.iterators.SerialIterator(dev_data, args.batch_size, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, loss_func=model.get_loss_function(), device=args.gpu) monitor = "validation/main/f1" if args.fine_tune else "validation/main/em" earlystop_trigger = triggers.EarlyStoppingTrigger( monitor=monitor, patients=5, mode="max", max_trigger=(args.num_epochs, 'epoch')) # trainer = training.Trainer(updater, (args.num_epochs, 'epoch')) trainer = training.Trainer(updater, earlystop_trigger) # test load trainer # chainer.serializers.load_npz('result/snapshot_iter_547', trainer) save_model_file = "best_model_rl_" + str(args.train_ratio) + "_" + str( args.dev_ratio) if args.fine_tune else "best_model_" + str( args.train_ratio) + "_" + str(args.dev_ratio) trainer.extend( # extensions.snapshot_object(model, save_model_file + "_{.updater.epoch}"), extensions.snapshot_object(model, save_model_file), trigger=chainer.training.triggers.MaxValueTrigger(monitor)) """ trainer.extend( extensions.snapshot(), trigger=chainer.training.triggers.MaxValueTrigger(monitor) ) """ # computational graph 2nd way trainer.extend( extensions.dump_graph(root_name="main/loss", out_name="cg.dot")) trainer.extend( MReaderEvaluator(model, dev_data, device=args.gpu, f1_key='validation/main/f1', em_key='validation/main/em', batch_size=args.batch_size, dot_file='cg_n.dot')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/mle_loss', 'main/rl_loss', 'validation/main/f1', 'validation/main/em', 'elapsed_time' ])) # trainer.extend(extensions.ProgressBar()) print('start training') trainer.run() """test start""" """ of = open('result/model_t.pkl', 'wb') pickle.dump(model, of) of.close() """ """test end""" if args.backup_fine_tune: args.fine_tune = True model.args.fine_tune = True if args.fine_tune: chainer.serializers.load_npz( 'result/best_model_' + str(args.train_ratio) + "_" + str(args.dev_ratio), model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) """ if args.predict: with chainer.using_config('train', False): predictor = MReaderEvaluatorPred( model, dev_data, device=args.gpu, f1_key='validation/main/f1', em_key='validation/main/em', batch_size=args.batch_size, dot_file='cg_n.dot' ) predictor() return """ args.fine_tune = True model.args.fine_tune = True # optimizer = chainer.optimizers.Adam(alpha=args.learning_rate) optimizer_rl = chainer.optimizers.Adam( alpha=args.rl_initial_learning_rate) optimizer_rl.setup(model) train_iter_rl = chainer.iterators.SerialIterator( train_data_input, args.batch_size) # validation_iter = chainer.iterators.SerialIterator(dev_data, args.batch_size, repeat=False, shuffle=False) updater_rl = training.StandardUpdater( train_iter_rl, optimizer_rl, loss_func=model.get_loss_function(), device=args.gpu) monitor_rl = "validation/main/f1" if args.fine_tune else "validation/main/em" earlystop_trigger_rl = triggers.EarlyStoppingTrigger( monitor=monitor_rl, patients=10, mode="max", max_trigger=(args.num_epochs, 'epoch')) trainer_rl = training.Trainer(updater_rl, earlystop_trigger_rl) save_model_file_rl = "best_model_rl_" + str( args.train_ratio) + "_" + str( args.dev_ratio) if args.fine_tune else "best_model_" + str( args.train_ratio) + "_" + str(args.dev_ratio) # "best_model_rl" if args.fine_tune else "best_model" trainer_rl.extend( # extensions.snapshot_object(model, save_model_file_rl + "_{.updater.epoch}"), extensions.snapshot_object(model, save_model_file_rl), trigger=chainer.training.triggers.MaxValueTrigger(monitor_rl)) # computational graph 2nd way trainer_rl.extend( extensions.dump_graph(root_name="main/loss", out_name="cg.dot")) trainer_rl.extend( MReaderEvaluator(model, dev_data, device=args.gpu, f1_key='validation/main/f1', em_key='validation/main/em', batch_size=args.batch_size, dot_file='cg_n.dot')) trainer_rl.extend(extensions.LogReport()) trainer_rl.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/mle_loss', 'main/rl_loss', 'validation/main/f1', 'validation/main/em', 'elapsed_time' ])) print("rl start running...") trainer_rl.run() if args.predict: saved_model_file = "result/best_model_rl_" + str( args.train_ratio) + "_" + str( args.dev_ratio ) if args.fine_tune else "result/best_model_" + str( args.train_ratio) + "_" + str(args.dev_ratio) chainer.serializers.load_npz(saved_model_file, model) import time saved_result_file = "result_" + str(args.train_ratio) + "_" + str( args.dev_ratio) + "_" + str(time.time()) if args.use_dict: saved_result_file += "_dict" with chainer.using_config('train', False): predictor = MReaderEvaluatorPred(model, dev_data, device=args.gpu, f1_key='validation/main/f1', em_key='validation/main/em', batch_size=args.batch_size, dot_file='cg_n.dot', file_p=saved_result_file) predictor() return
def handler(context): class_labels = 10 dataset_alias = context.datasets train_dataset_id = dataset_alias['train'] test_dataset_id = dataset_alias['test'] train_data = list(load_dataset_from_api(train_dataset_id)) test_data = list(load_dataset_from_api(test_dataset_id)) train = ImageDatasetFromAPI(train_data, train=True) test = ImageDatasetFromAPI(test_data) net = utils.VGG.VGG(class_labels) model = L.Classifier(net) if USE_GPU >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(USE_GPU).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) stop_trigger = (epochs, 'epoch') # Early stopping option if early_stopping: stop_trigger = triggers.EarlyStoppingTrigger(monitor=early_stopping, verbose=True, max_trigger=(epochs, 'epoch')) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=USE_GPU) trainer = training.Trainer(updater, stop_trigger, out=ABEJA_TRAINING_RESULT_DIR) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=USE_GPU)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot_object(net, 'net.model'), trigger=(epochs, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. report_entries = [ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ] trainer.extend(Statistics(report_entries, epochs), trigger=(1, 'epoch')) trainer.extend(Tensorboard(report_entries, out_dir=log_path)) trainer.extend(extensions.PrintReport(report_entries)) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = VATLossClassifier(MLP(args.unit, 10)) #model = VATLossClassifier(MLP_cnn(args.unit, 10)) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam(alpha=0.0001) optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist(ndim=3) list_train = list(train) list_test = list(test) random.shuffle(list_train) random.shuffle(list_test) label_size = 100 test_size = 1000 list_train_labelled = list_train[0:label_size] list_train_unlabelled = list_train[label_size:-test_size] #list_test_a=list_train[-test_size:] list_test_a = list_test[-test_size:] #tuple_test=tuple(a.test) tuple_train_l = tuple(list_train_labelled) tuple_train_ul = tuple(list_train_unlabelled) tuple_test = tuple(list_test_a) train_l_iter = chainer.iterators.SerialIterator(tuple_train_l, 100) train_ul_iter = chainer.iterators.SerialIterator(tuple_train_ul, 250) test_iter = chainer.iterators.SerialIterator(tuple_test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer stop_trigger = triggers.EarlyStoppingTrigger( monitor='validation/main/loss', max_trigger=(20, 'epoch')) #updater = training.updaters.StandardUpdater(train_l_iter, optimizer, device=args.gpu) updater = VATUpdater(train_l_iter, train_ul_iter, optimizer, device=args.gpu) #updater.first_iter() trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) #trainer = training.Trainer(updater, stop_trigger , out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(50, 'epoch'))) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', trigger=(50, 'epoch'), file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', trigger=(50, 'epoch'), file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. pr = extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ]) pr.trigger = 500, 'iteration' trainer.extend(pr) #trainer.extend(extensions.PrintReport( #['epoch', 'main/loss', 'validation/main/loss', #'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): args = parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') train, test = get_mnist(ndim=3) class_labels = 10 model = L.Classifier(CNN(class_labels), lossfun=focal_loss) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) stop_trigger = (args.epoch, 'epoch') # Early stopping option if args.early_stopping: stop_trigger = triggers.EarlyStoppingTrigger( monitor=args.early_stopping, verbose=True, max_trigger=(args.epoch, 'epoch')) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def train(model_class, n_base_units, trained_model, no_obj_weight, data, result_dir, initial_batch_size=10, max_batch_size=1000, max_epoch=100): train_x, train_y, val_x, val_y = data max_class_id = 0 for objs in val_y: for obj in objs: max_class_id = max(max_class_id, obj[4]) n_classes = max_class_id + 1 class_weights = [1.0 for i in range(n_classes)] class_weights[0] = no_obj_weight train_dataset = YoloDataset(train_x, train_y, target_size=model_class.img_size, n_grid=model_class.n_grid, augment=True, class_weights=class_weights) test_dataset = YoloDataset(val_x, val_y, target_size=model_class.img_size, n_grid=model_class.n_grid, augment=False, class_weights=class_weights) model = model_class(n_classes, n_base_units) model.loss_calc = LossCalculator(n_classes, class_weights=class_weights) last_result_file = os.path.join(result_dir, 'best_loss.npz') if os.path.exists(last_result_file): try: chainer.serializers.load_npz(last_result_file, model) print('this training has done. resuse the result') return model except: pass if trained_model: print('copy params from trained model') copy_params(trained_model, model) optimizer = Adam() optimizer.setup(model) n_physical_cpu = int(math.ceil(multiprocessing.cpu_count() / 2)) train_iter = MultiprocessIterator(train_dataset, batch_size=initial_batch_size, n_prefetch=n_physical_cpu, n_processes=n_physical_cpu) test_iter = MultiprocessIterator(test_dataset, batch_size=initial_batch_size, shuffle=False, repeat=False, n_prefetch=n_physical_cpu, n_processes=n_physical_cpu) updater = StandardUpdater(train_iter, optimizer, device=0) stopper = triggers.EarlyStoppingTrigger(check_trigger=(1, 'epoch'), monitor="validation/main/loss", patients=10, mode="min", max_trigger=(max_epoch, "epoch")) trainer = Trainer(updater, stopper, out=result_dir) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.Evaluator(test_iter, model, device=0)) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/cl_loss', 'validation/main/cl_loss', 'main/cl_acc', 'validation/main/cl_acc', 'main/pos_loss', 'validation/main/pos_loss', ])) trainer.extend(extensions.snapshot_object(model, 'best_loss.npz'), trigger=triggers.MinValueTrigger('validation/main/loss')) trainer.extend(extensions.snapshot_object(model, 'best_classification.npz'), trigger=triggers.MaxValueTrigger('validation/main/cl_acc')) trainer.extend( extensions.snapshot_object(model, 'best_position.npz'), trigger=triggers.MinValueTrigger('validation/main/pos_loss')) trainer.extend(extensions.snapshot_object(model, 'model_last.npz'), trigger=(1, 'epoch')) trainer.extend(AdaptiveBatchsizeIncrement(maxsize=max_batch_size), trigger=(1, 'epoch')) trainer.run() chainer.serializers.load_npz(os.path.join(result_dir, 'best_loss.npz'), model) return model
def train_phase(predictor, train, valid, args): print('# classes:', train.n_classes) print('# samples:') print('-- train:', len(train)) print('-- valid:', len(valid)) # setup dataset iterators train_batchsize = min(args.batchsize * len(args.gpu), len(train)) valid_batchsize = args.batchsize train_iter = chainer.iterators.MultiprocessIterator(train, train_batchsize) valid_iter = chainer.iterators.SerialIterator(valid, valid_batchsize, repeat=False, shuffle=True) # setup a model class_weight = None # NOTE: please set if you have.. lossfun = partial(softmax_cross_entropy, normalize=False, class_weight=class_weight) model = Classifier(predictor, lossfun=lossfun) if args.gpu[0] >= 0: chainer.backends.cuda.get_device_from_id(args.gpu[0]).use() if len(args.gpu) == 1: model.to_gpu() # setup an optimizer optimizer = chainer.optimizers.Adam(alpha=args.lr, beta1=0.9, beta2=0.999, eps=1e-08, amsgrad=False) optimizer.setup(model) if args.decay > 0: optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(args.decay)) # setup a trainer if len(args.gpu) == 1: updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu[0]) else: devices = {'main': args.gpu[0]} for idx, g in enumerate(args.gpu[1:]): devices['slave_%d' % idx] = g updater = training.updaters.ParallelUpdater(train_iter, optimizer, devices=devices) frequency = max(args.iteration // 20, 1) if args.frequency == -1 else max(1, args.frequency) stop_trigger = triggers.EarlyStoppingTrigger( monitor='validation/main/loss', max_trigger=(args.iteration, 'iteration'), check_trigger=(frequency, 'iteration'), patients=np.inf if args.pinfall == -1 else max(1, args.pinfall)) trainer = training.Trainer(updater, stop_trigger, out=args.out) # setup a visualizer transforms = { 'x': lambda x: x, 'y': lambda x: np.argmax(x, axis=0), 't': lambda x: x } cmap = np.array([[0, 0, 0], [0, 0, 1]]) cmaps = {'x': None, 'y': cmap, 't': cmap} clims = {'x': 'minmax', 'y': None, 't': None} visualizer = ImageVisualizer(transforms=transforms, cmaps=cmaps, clims=clims) # setup a validator valid_file = os.path.join('validation', 'iter_{.updater.iteration:08}.png') trainer.extend(Validator(valid_iter, model, valid_file, visualizer=visualizer, n_vis=20, device=args.gpu[0]), trigger=(frequency, 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot( filename='snapshot_iter_{.updater.iteration:08}.npz'), trigger=(frequency, 'iteration')) trainer.extend(extensions.snapshot_object( predictor, 'predictor_iter_{.updater.iteration:08}.npz'), trigger=(frequency, 'iteration')) log_keys = [ 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ] trainer.extend(LogReport(keys=log_keys)) # setup log ploter if extensions.PlotReport.available(): for plot_key in ['loss', 'accuracy']: plot_keys = [ key for key in log_keys if key.split('/')[-1].startswith(plot_key) ] trainer.extend( extensions.PlotReport(plot_keys, 'iteration', file_name=plot_key + '.png', trigger=(frequency, 'iteration'))) trainer.extend( PrintReport(['iteration'] + log_keys + ['elapsed_time'], n_step=100)) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # train trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--device', '-d', type=str, default='0', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--early-stopping', type=str, help='Metric to watch for early stopping') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() if chainer.get_dtype() == numpy.float16: warnings.warn('This example may cause NaN in FP16 mode.', RuntimeWarning) device = chainer.get_device(args.device) if device.xp is chainerx: sys.stderr.write('This example does not support ChainerX devices.\n') sys.exit(1) print('Device: {}'.format(device)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') device.use() # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') model = L.Classifier(models.VGG.VGG(class_labels)) model.to_device(device) optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) stop_trigger = (args.epoch, 'epoch') # Early stopping option if args.early_stopping: stop_trigger = triggers.EarlyStoppingTrigger( monitor=args.early_stopping, verbose=True, max_trigger=(args.epoch, 'epoch')) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. # TODO(hvy): Support ChainerX if device.xp is not chainerx: trainer.extend(extensions.DumpGraph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): # Parse the arguments. args = parse_arguments() if args['label']: labels = args['label'] class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): label_arr = np.asarray(label_list, dtype=np.int32) return label_arr # Apply a preprocessor to the dataset. logging.info('Preprocess train dataset and valid dataset...') # use `ggnn` for the time being preprocessor = preprocess_method_dict['ggnn']() # parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label, # labels=labels, smiles_cols=['smiles_1', 'smiles_2']) if args['feature'] == 'molenc': parser = MolAutoencoderParserForPair( preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) if args['feature'] == 'ssp': parser = SSPParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) else: parser = Mol2VecParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) train = parser.parse(args['train_datafile'])['dataset'] valid = parser.parse(args['valid_datafile'])['dataset'] if args['augment']: logging.info('Utilizing data augmentation in train set') train = augment_dataset(train) num_train = train.get_datasets()[0].shape[0] num_valid = valid.get_datasets()[0].shape[0] logging.info('Train/test split: {}/{}'.format(num_train, num_valid)) if len(args['net_hidden_dims']): net_hidden_dims = tuple([ int(net_hidden_dim) for net_hidden_dim in args['net_hidden_dims'].split(',') ]) else: net_hidden_dims = () predictor = set_up_predictor(fp_out_dim=args['fp_out_dim'], net_hidden_dims=net_hidden_dims, class_num=class_num, sim_method=args['sim_method'], symmetric=args['symmetric']) train_iter = SerialIterator(train, args['batchsize']) test_iter = SerialIterator(valid, args['batchsize'], repeat=False, shuffle=False) metrics_fun = {'accuracy': F.binary_accuracy} classifier = Classifier(predictor, lossfun=F.sigmoid_cross_entropy, metrics_fun=metrics_fun, device=args['gpu']) # Set up the optimizer. optimizer = optimizers.Adam(alpha=args['learning_rate'], weight_decay_rate=args['weight_decay_rate']) # optimizer = optimizers.Adam() # optimizer = optimizers.SGD(lr=args.learning_rate) optimizer.setup(classifier) # add regularization if args['max_norm'] > 0: optimizer.add_hook( chainer.optimizer.GradientClipping(threshold=args['max_norm'])) if args['l2_rate'] > 0: optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args['l2_rate'])) if args['l1_rate'] > 0: optimizer.add_hook(chainer.optimizer.Lasso(rate=args['l1_rate'])) updater = training.StandardUpdater(train_iter, optimizer, device=args['gpu'], converter=concat_mols) # Set up the trainer. logging.info('Training...') # add stop_trigger parameter early_stop = triggers.EarlyStoppingTrigger(monitor='validation/main/loss', patients=10, max_trigger=(500, 'epoch')) out = 'output' + '/' + args['out'] trainer = training.Trainer(updater, stop_trigger=early_stop, out=out) trainer.extend( E.Evaluator(test_iter, classifier, device=args['gpu'], converter=concat_mols)) train_eval_iter = SerialIterator(train, args['batchsize'], repeat=False, shuffle=False) trainer.extend( AccuracyEvaluator(train_eval_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='train_acc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( AccuracyEvaluator(test_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='val_acc', pos_labels=1, ignore_labels=-1)) trainer.extend( ROCAUCEvaluator(train_eval_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='train_roc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( ROCAUCEvaluator(test_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='val_roc', pos_labels=1, ignore_labels=-1)) trainer.extend( PRCAUCEvaluator(train_eval_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='train_prc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( PRCAUCEvaluator(test_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='val_prc', pos_labels=1, ignore_labels=-1)) trainer.extend( F1Evaluator(train_eval_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='train_f', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( F1Evaluator(test_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='val_f', pos_labels=1, ignore_labels=-1)) # apply shift strategy to learning rate every 10 epochs # trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=(10, 'epoch')) if args['exp_shift_strategy'] == 1: trainer.extend(E.ExponentialShift('alpha', args['exp_shift_rate']), trigger=triggers.ManualScheduleTrigger( [10, 20, 30, 40, 50, 60], 'epoch')) elif args['exp_shift_strategy'] == 2: trainer.extend(E.ExponentialShift('alpha', args['exp_shift_rate']), trigger=triggers.ManualScheduleTrigger( [5, 10, 15, 20, 25, 30], 'epoch')) elif args['exp_shift_strategy'] == 3: trainer.extend(E.ExponentialShift('alpha', args['exp_shift_rate']), trigger=triggers.ManualScheduleTrigger( [5, 10, 15, 20, 25, 30, 40, 50, 60, 70], 'epoch')) else: raise ValueError('No such strategy to adapt learning rate') # # observation of learning rate trainer.extend(E.observe_lr(), trigger=(1, 'iteration')) entries = [ 'epoch', 'main/loss', 'train_acc/main/accuracy', 'train_roc/main/roc_auc', 'train_prc/main/prc_auc', # 'train_p/main/precision', 'train_r/main/recall', 'train_f/main/f1', 'validation/main/loss', 'val_acc/main/accuracy', 'val_roc/main/roc_auc', 'val_prc/main/prc_auc', # 'val_p/main/precision', 'val_r/main/recall', 'val_f/main/f1', 'lr', 'elapsed_time' ] trainer.extend(E.PrintReport(entries=entries)) # change from 10 to 2 on Mar. 1 2019 trainer.extend(E.snapshot(), trigger=(2, 'epoch')) trainer.extend(E.LogReport()) trainer.extend(E.ProgressBar()) trainer.extend( E.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( E.PlotReport(['train_acc/main/accuracy', 'val_acc/main/accuracy'], 'epoch', file_name='accuracy.png')) if args['resume']: resume_path = os.path.join(out, args['resume']) logging.info( 'Resume training according to snapshot in {}'.format(resume_path)) chainer.serializers.load_npz(resume_path, trainer) trainer.run() # Save the regressor's parameters. model_path = os.path.join(out, args['model_filename']) logging.info('Saving the trained models to {}...'.format(model_path)) classifier.save_pickle(model_path, protocol=args['protocol'])
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--early-stopping', type=str, help='Metric to watch for early stopping') parser.add_argument('--model-type', type=str, help='Type of model to fit', default='simple_linear') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') modelsdict = { 'fitnet1': models.fitnet1.FitNet1(class_labels), 'simple_linear': models.simple_linear.SimpleLinear(class_labels), } model = L.Classifier(modelsdict[args.model_type]) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) stop_trigger = (args.epoch, 'epoch') # Early stopping option if args.early_stopping: stop_trigger = triggers.EarlyStoppingTrigger( monitor=args.early_stopping, verbose=True, max_trigger=(args.epoch, 'epoch')) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser( 'Reinforced Mnemonic Reader', formatter_class=argparse.ArgumentDefaultsHelpFormatter) # Read parameters add_train_args(parser) args = parser.parse_args() set_defaults(args) set_random_seed(args.random_seed) train_data = DataUtils.load_data(args.train_file) dev_data = DataUtils.load_data(args.dev_file) # for Debug if DataUtils.IS_DEBUG: train_data = train_data[:128] dev_data = dev_data[:128] else: # debug train_size = int(len(train_data) * 0.005) # train_size = int(len(train_data) * 0.3) # train_size = int(len(train_data) * 0.7) # train_size = len(train_data) print(train_size) train_data = train_data[:train_size] dev_size = int(len(dev_data) * 0.05) # dev_size = len(dev_data) # dev_size = int(len(dev_data) * 0.5) print(dev_size) dev_data = dev_data[:dev_size] all_data = train_data + dev_data args.pos_size = DataUtils.transform_pos_feature(all_data) args.ner_size = DataUtils.transform_ner_feature(all_data) args.qtype_size = 11 # max_question_len = DataUtils.get_max_question_len(all_data) args.context_max_length = DataUtils.get_max_len(all_data, DataType.CONTEXT) max_question_len = DataUtils.get_max_len(all_data, DataType.QUESTION) DataUtils.MAX_DOC_LENGTH = args.context_max_length DataUtils.MAX_Q_LENGTH = max_question_len DataUtils.cal_mask(train_data, args.context_max_length, max_question_len) DataUtils.cal_mask(dev_data, args.context_max_length, max_question_len) ## pretrain_embedding_file = os.path.join( args.embed_dir, "pretrain_embedding_v6_a_0.005_0.05") pretrain_index_file = os.path.join( args.embed_dir, "pretrain_index_file_v6_0.005_0.05.txt") # args.w_embeddings = DataUtils.load_embedding(all_data, args.embedding_file, args.embedding_dim) args.w_embeddings = DataUtils.load_embedding( all_data, args.embedding_file, args.embedding_dim, pretrained_embedding_file=pretrain_embedding_file, pretrained_index_file=pretrain_index_file, overwrite=False) # args.w_embeddings = object() # DataUtils.load_embedding(all_data, args.embedding_file, args.embedding_dim, args.w_embeddings) if DataUtils.IS_DEBUG: print("load_embedding : finished...") pretrain_char_embedding_file = os.path.join( args.embed_dir, "pretrain_char_embedding_v6_a_0.005_0.05") pretrain_char_index_file = os.path.join( args.embed_dir, "pretrain_char_index_file_v6_a_0.005_0.05.txt") args.char_embeddings = DataUtils.load_char_embedding( all_data, args.char_embedding_file, args.char_embedding_dim, pretrained_embedding_file=pretrain_char_embedding_file, pretrained_index_file=pretrain_char_index_file, overwrite=False) args.vocab_size = len(DataUtils.word_dict) args.char_size = len(DataUtils.char_dict) print(args.vocab_size) if args.use_elmo: # add elmo online # initialize elmo batcher DataUtils.load_elmo_batcher(args.vocab_file) """ # --gpu=0 # --hops=3 # --ptr-hops=2 # --data-dir=../data/datasets # --train-file=SQuAD-train-v1.1-processed-spacy_n.txt # --dev-file=SQuAD-dev-v1.1-processed-spacy_n.txt # --embed-dir=../data/embeddings # --lambda-param=1 # --gamma=3 # --learning-rate=0.0008 # --encoder-dropout=0 # --options-file # ../../test_elmo/src/elmo-chainer/elmo_2x4096_512_2048cnn_2xhighway_options.json # --weight-file # ../../test_elmo/src/elmo-chainer/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5 # --vocab-file # ../../test_elmo/src/elmo-chainer/vocab-2016-09-10.txt # --context-elmo-embedding=../data/embeddings/context_embedding.npy # --question-elmo-embedding=../data/embeddings/question_embedding.npy #DataUtils.load_elmo_embedding(args.context_elmo_embedding, args.question_elmo_embedding) DataUtils.load_elmo_embedding(args.sentence_mapping_file, args.h5py_embedding_file) """ # train_data = DataUtils.convert_data(train_data, args.context_max_length, max_question_len) # dev_data = DataUtils.convert_data(dev_data, args.context_max_length, max_question_len) # train_data = tuple_dataset.TupleDataset(train_data) # dev_data = tuple_dataset.TupleDataset(dev_data) train_data_input = chainer.datasets.TransformDataset( train_data, DataUtils.convert_item) # dev_data = chainer.datasets.TransformDataset(dev_data, DataUtils.convert_item_dev) # because of memory # args.batch_size = 32 args.batch_size = 16 # args.batch_size = 8 args.num_features = 4 args.num_epochs = 100 # cg args.dot_file = "cg_f__.dot" # model = MReader_V3(args) model = MReader_V6(args) if args.fine_tune: chainer.serializers.load_npz('result/best_model', model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) optimizer = chainer.optimizers.Adam(alpha=args.learning_rate) optimizer.setup(model) train_iter = chainer.iterators.SerialIterator(train_data_input, args.batch_size) # validation_iter = chainer.iterators.SerialIterator(dev_data, args.batch_size, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, loss_func=model.get_loss_function(), device=args.gpu) """ earlystop_trigger = triggers.EarlyStoppingTrigger(monitor='main/loss', patients=5, max_trigger=(args.num_epochs, 'epoch')) """ monitor = "validation/main/f1" if args.fine_tune else "validation/main/em" earlystop_trigger = triggers.EarlyStoppingTrigger( monitor=monitor, patients=5, mode="max", max_trigger=(args.num_epochs, 'epoch')) trainer = training.Trainer(updater, (args.num_epochs, 'epoch')) # trainer = training.Trainer(updater, earlystop_trigger) save_model_file = "best_model_rl" if args.fine_tune else "best_model" """ trainer.extend( extensions.snapshot_object(model, save_model_file), trigger=chainer.training.triggers.MinValueTrigger('main/loss') ) """ trainer.extend(extensions.snapshot_object(model, save_model_file), trigger=chainer.training.triggers.MaxValueTrigger(monitor)) # trainer.extend(extensions.Evaluator(validation_iter, model, device=args.gpu, eval_func=model.get_evaluation_fun())) ''' trainer.extend( MReaderEvaluator( model, dev_data, device=args.gpu, f1_key='validation/main/f1', em_key='validation/main/em', batch_size=args.batch_size, dot_file='cg_n.dot' ) ) ''' # computational graph 2nd way trainer.extend( extensions.dump_graph(root_name="main/loss", out_name="cg.dot")) trainer.extend( MReaderEvaluator(model, dev_data, device=args.gpu, f1_key='validation/main/f1', em_key='validation/main/em', batch_size=args.batch_size, dot_file='cg_n.dot')) trainer.extend(extensions.LogReport()) ''' trainer.extend( extensions.LogReport(trigger=(args.log_interval, 'iteration')) ) ''' ''' trainer.extend( extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'validation/main/f1', 'validation/main/em', 'elapsed_time'] ), trigger=(args.log_interval, 'iteration') ) ''' ''' trainer.extend( extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'validation/main/f1', 'validation/main/em', 'elapsed_time'] ) ) ''' trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/mle_loss', 'main/rl_loss', 'validation/main/f1', 'validation/main/em', 'elapsed_time' ])) # trainer.extend(extensions.ProgressBar()) print('start training') trainer.run()
def main(args=None): set_random_seed(63) chainer.global_config.autotune = True chainer.cuda.set_max_workspace_size(512 * 1024 * 1024) parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.01, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=80, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--loss-function', choices=['focal', 'sigmoid'], default='focal') parser.add_argument('--optimizer', choices=['sgd', 'adam', 'adabound'], default='adam') parser.add_argument('--size', type=int, default=224) parser.add_argument('--limit', type=int, default=None) parser.add_argument('--data-dir', type=str, default='data') parser.add_argument('--lr-search', action='store_true') parser.add_argument('--pretrained', type=str, default='') parser.add_argument('--backbone', choices=['resnet', 'seresnet', 'debug_model'], default='resnet') parser.add_argument('--log-interval', type=int, default=100) parser.add_argument('--find-threshold', action='store_true') parser.add_argument('--finetune', action='store_true') parser.add_argument('--mixup', action='store_true') args = parser.parse_args() if args is None else parser.parse_args(args) print(args) if args.mixup and args.loss_function != 'focal': raise ValueError('mixupを使うときはfocal lossしか使えません(いまんところ)') train, test, cooccurrence = get_dataset(args.data_dir, args.size, args.limit, args.mixup) base_model = backbone_catalog[args.backbone](args.dropout) if args.pretrained: print('loading pretrained model: {}'.format(args.pretrained)) chainer.serializers.load_npz(args.pretrained, base_model, strict=False) model = TrainChain(base_model, 1, loss_fn=args.loss_function, cooccurrence=cooccurrence, co_coef=0) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() if args.optimizer in ['adam', 'adabound']: optimizer = Adam(alpha=args.learnrate, adabound=args.optimizer == 'adabound', weight_decay_rate=1e-5, gamma=5e-7) elif args.optimizer == 'sgd': optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate) optimizer.setup(model) if not args.finetune: print('最初のエポックは特徴抽出層をfreezeします') model.freeze_extractor() train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=8, n_prefetch=2) test_iter = chainer.iterators.MultithreadIterator(test, args.batchsize, n_threads=8, repeat=False, shuffle=False) if args.find_threshold: # train_iter, optimizerなど無駄なsetupもあるが。。 print('thresholdを探索して終了します') chainer.serializers.load_npz(join(args.out, 'bestmodel_loss'), base_model) print('lossがもっとも小さかったモデルに対しての結果:') find_threshold(base_model, test_iter, args.gpu, args.out) chainer.serializers.load_npz(join(args.out, 'bestmodel_f2'), base_model) print('f2がもっとも大きかったモデルに対しての結果:') find_threshold(base_model, test_iter, args.gpu, args.out) return # Set up a trainer updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu, converter=lambda batch, device: chainer.dataset.concat_examples( batch, device=device)) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(FScoreEvaluator(test_iter, model, device=args.gpu)) if args.optimizer == 'sgd': # Adamにweight decayはあんまりよくないらしい optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(3, 'epoch')) if args.lr_search: print('最適な学習率を探します') trainer.extend(LRFinder(1e-7, 1, 5, optimizer), trigger=(1, 'iteration')) elif args.optimizer in ['adam', 'adabound']: if args.lr_search: print('最適な学習率を探します') trainer.extend(LRFinder(1e-7, 1, 5, optimizer, lr_key='alpha'), trigger=(1, 'iteration')) trainer.extend(extensions.ExponentialShift('alpha', 0.2), trigger=triggers.EarlyStoppingTrigger( monitor='validation/main/loss')) # Take a snapshot of Trainer at each epoch trainer.extend( extensions.snapshot(filename='snaphot_epoch_{.updater.epoch}'), trigger=(10, 'epoch')) # Take a snapshot of Model which has best val loss. # Because searching best threshold for each evaluation takes too much time. trainer.extend(extensions.snapshot_object(model.model, 'bestmodel_loss'), trigger=triggers.MinValueTrigger('validation/main/loss')) trainer.extend(extensions.snapshot_object(model.model, 'bestmodel_f2'), trigger=triggers.MaxValueTrigger('validation/main/f2')) trainer.extend(extensions.snapshot_object(model.model, 'model_{.updater.epoch}'), trigger=(5, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend( extensions.LogReport(trigger=(args.log_interval, 'iteration'))) trainer.extend( extensions.PrintReport([ 'epoch', 'lr', 'elapsed_time', 'main/loss', 'main/co_loss', 'validation/main/loss', 'validation/main/co_loss', 'validation/main/precision', 'validation/main/recall', 'validation/main/f2', 'validation/main/threshold' ])) trainer.extend(extensions.ProgressBar(update_interval=args.log_interval)) trainer.extend(extensions.observe_lr(), trigger=(args.log_interval, 'iteration')) trainer.extend(CommandsExtension()) save_args(args, args.out) trainer.extend(lambda trainer: model.unfreeze_extractor(), trigger=(1, 'epoch')) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # save args with pickle for prediction time pickle.dump(args, open(str(Path(args.out).joinpath('args.pkl')), 'wb')) # Run the training trainer.run() # find optimal threshold chainer.serializers.load_npz(join(args.out, 'bestmodel_loss'), base_model) print('lossがもっとも小さかったモデルに対しての結果:') find_threshold(base_model, test_iter, args.gpu, args.out) chainer.serializers.load_npz(join(args.out, 'bestmodel_f2'), base_model) print('f2がもっとも大きかったモデルに対しての結果:') find_threshold(base_model, test_iter, args.gpu, args.out)
def main(): #%% Load datasets train, valid, test, train_moles, valid_moles, test_moles = load_dataset( CTYPE) train_gp = train.groupby('molecule_name') valid_gp = valid.groupby('molecule_name') test_gp = test.groupby('molecule_name') #%% structures = pd.read_csv(DATA_PATH / 'structures.csv') giba_features = pd.read_csv(DATA_PATH / 'unified-features' / 'giba_features.csv', index_col=0) structures = pd.merge(structures, giba_features.drop(['atom_name', 'x', 'y', 'z'], axis=1), on=['molecule_name', 'atom_index']) norm_col = [ col for col in structures.columns if col not in ['molecule_name', 'atom_index', 'atom', 'x', 'y', 'z'] ] structures[norm_col] = (structures[norm_col] - structures[norm_col].mean() ) / structures[norm_col].std() structures = structures.fillna(0) structures_groups = structures.groupby('molecule_name') #%% if CTYPE != 'all': train_couple = pd.read_csv(DATA_PATH / 'typewise-dataset' / 'kuma_dataset' / 'kuma_dataset' / 'train' / '{}_full.csv'.format(CTYPE), index_col=0) else: train_couple = pd.read_csv(DATA_PATH / 'typewise-dataset' / 'kuma_dataset' / 'kuma_dataset' / 'train_all.csv', index_col=0) train_couple = reduce_mem_usage(train_couple) train_couple = train_couple.drop( ['id', 'scalar_coupling_constant', 'type'], axis=1) if CTYPE != 'all': test_couple = pd.read_csv(DATA_PATH / 'typewise-dataset' / 'kuma_dataset' / 'kuma_dataset' / 'test' / '{}_full.csv'.format(CTYPE), index_col=0) else: test_couple = pd.read_csv(DATA_PATH / 'typewise-dataset' / 'kuma_dataset' / 'kuma_dataset' / 'test_all.csv', index_col=0) test_couple = reduce_mem_usage(test_couple) test_couple = test_couple.drop(['id', 'type'], axis=1) couples = pd.concat([train_couple, test_couple]) del train_couple, test_couple couples_norm_col = [ col for col in couples.columns if col not in ['atom_index_0', 'atom_index_1', 'molecule_name', 'type'] ] for col in couples_norm_col: if couples[col].dtype == np.dtype('O'): couples = pd.get_dummies(couples, columns=[col]) else: couples[col] = (couples[col] - couples[col].mean()) / couples[col].std() couples = couples.fillna(0) couples = couples.replace(np.inf, 0) couples = couples.replace(-np.inf, 0) couples_groups = couples.groupby('molecule_name') #%% Make graphs feature_col = [ col for col in structures.columns if col not in ['molecule_name', 'atom_index', 'atom'] ] list_atoms = list(set(structures['atom'])) print('list of atoms') print(list_atoms) train_graphs = list() train_targets = list() train_couples = list() print('preprocess training molecules ...') for mole in tqdm(train_moles): train_graphs.append( Graph(structures_groups.get_group(mole), list_atoms, feature_col)) train_targets.append(train_gp.get_group(mole)) train_couples.append(couples_groups.get_group(mole)) valid_graphs = list() valid_targets = list() valid_couples = list() print('preprocess validation molecules ...') for mole in tqdm(valid_moles): valid_graphs.append( Graph(structures_groups.get_group(mole), list_atoms, feature_col)) valid_targets.append(valid_gp.get_group(mole)) valid_couples.append(couples_groups.get_group(mole)) test_graphs = list() test_targets = list() test_couples = list() print('preprocess test molecules ...') for mole in tqdm(test_moles): test_graphs.append( Graph(structures_groups.get_group(mole), list_atoms, feature_col)) test_targets.append(test_gp.get_group(mole)) test_couples.append(couples_groups.get_group(mole)) #%% Make datasets train_dataset = DictDataset(graphs=train_graphs, targets=train_targets, couples=train_couples) valid_dataset = DictDataset(graphs=valid_graphs, targets=valid_targets, couples=valid_couples) test_dataset = DictDataset(graphs=test_graphs, targets=test_targets, couples=test_couples) #%% Build Model model = SchNet(num_layer=NUM_LAYER) model.to_gpu(device=0) #%% Sampler train_sampler = SameSizeSampler(structures_groups, train_moles, BATCH_SIZE) valid_sampler = SameSizeSampler(structures_groups, valid_moles, BATCH_SIZE, use_remainder=True) test_sampler = SameSizeSampler(structures_groups, test_moles, BATCH_SIZE, use_remainder=True) #%% Iterator, Optimizer train_iter = chainer.iterators.SerialIterator(train_dataset, BATCH_SIZE, order_sampler=train_sampler) valid_iter = chainer.iterators.SerialIterator(valid_dataset, BATCH_SIZE, repeat=False, order_sampler=valid_sampler) test_iter = chainer.iterators.SerialIterator(test_dataset, BATCH_SIZE, repeat=False, order_sampler=test_sampler) optimizer = optimizers.Adam(alpha=1e-3) optimizer.setup(model) #%% Updater if opt.multi_gpu: updater = training.updaters.ParallelUpdater( train_iter, optimizer, # The device of the name 'main' is used as a "master", while others are # used as slaves. Names other than 'main' are arbitrary. devices={ 'main': 0, 'sub1': 1, 'sub2': 2, 'sub3': 3 }, ) else: updater = training.StandardUpdater(train_iter, optimizer, converter=coupling_converter, device=0) # early_stopping stop_trigger = triggers.EarlyStoppingTrigger( patients=EARLY_STOPPING_ROUNDS, monitor='valid/main/ALL_LogMAE', max_trigger=(EPOCH, 'epoch')) trainer = training.Trainer(updater, stop_trigger, out=RESULT_PATH) # trainer = training.Trainer(updater, (100, 'epoch'), out=RESULT_PATH) #%% Evaluator trainer.extend( TypeWiseEvaluator(iterator=valid_iter, target=model, converter=coupling_converter, name='valid', device=0, is_validate=True)) trainer.extend( TypeWiseEvaluator(iterator=test_iter, target=model, converter=coupling_converter, name='test', device=0, is_submit=True)) #%% Other extensions trainer.extend(training.extensions.ExponentialShift('alpha', 0.99999)) trainer.extend(stop_train_mode(trigger=(1, 'epoch'))) trainer.extend( training.extensions.observe_value( 'alpha', lambda tr: tr.updater.get_optimizer('main').alpha)) trainer.extend(training.extensions.LogReport(log_name=f'log_{CTYPE}')) trainer.extend( training.extensions.PrintReport([ 'epoch', 'elapsed_time', 'main/loss', 'valid/main/ALL_LogMAE', 'alpha' ])) # trainer.extend(extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}')) trainer.extend(SaveRestore(filename=f'best_epoch_{CTYPE}'), trigger=triggers.MinValueTrigger('valid/main/ALL_LogMAE')) #%% Train if not opt.test: chainer.config.train = True trainer.run() else: chainer.config.train = False snapshot_path = f'results/chainer/best_epoch_{CTYPE}' chainer.serializers.npz.load_npz(snapshot_path, model, 'updater/model:main/') oof = predict_iter(valid_iter, model) oof.to_csv(f'schnet_{CTYPE}_oof.csv', index=False) #%% Final Evaluation chainer.config.train = False prediction = predict_iter(test_iter, model) prediction.to_csv(f'schnet_{CTYPE}.csv', index=False)
def main(): start = time.time() current_datetime = '{}'.format(datetime.datetime.today()) parser = argparse.ArgumentParser(description='Chainer Text Ranking') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=5, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=200, help='Number of units') parser.add_argument('--h-unit', '-hu', type=int, default=128, help='Number of hidden units') parser.add_argument('--vocabsize', type=int, default=50000, help='Number of max vocabulary') parser.add_argument('--dropout', '-d', type=float, default=0.1, help='Dropout rate') parser.add_argument('--train', '-T', required=True, help='train dataset') parser.add_argument('--dev', '-D', required=True, help='dev dataset') parser.add_argument('--vocab', '-v', help='word2vec format vocab file (not binary)') parser.add_argument('--vocab-source', '-vs', help='source file of creating vocabulary') parser.add_argument('--kernel', '-k', help='kernel parameter file (csv)') parser.add_argument('--use-dataset-api', default=False, action='store_true', help='use TextDataset API to reduce CPU memory usage') parser.add_argument( '--validation-interval', type=int, default=10000, help='number of iteration to evaluate the model with validation dataset' ) parser.add_argument('--snapshot-interval', type=int, default=10000, help='number of iteration to save training snapshot') parser.add_argument('--resume', '-r', type=str, help='resume the training from snapshot') parser.add_argument('--model', '-model', default='transfer', choices=['cnn', 'transfer'], help='Name of encoder model type.') parser.add_argument('--early-stop', action='store_true', help='use early stopping method') parser.add_argument('--snapshot-divide', action='store_true', help='save divide snapshot') parser.add_argument('--debug-mode', action='store_true', help='debug mode') parser.add_argument('--save-init', action='store_true', help='save init model') parser.add_argument('--save-epoch', action='store_true', help='save model per epoch (not only best epoch)') parser.add_argument('--progressbar', action='store_true', help='show training progressbar') args = parser.parse_args() print(json.dumps(args.__dict__, indent=2)) # load vocabulary vocab, embed_init = None, None if args.vocab: vocab, init_vector = load_word2vec_model(args.vocab, units=args.unit) elif args.vocab_source: vocab = make_vocab(args.vocab_source, args.vocabsize) assert (vocab is not None) # load data sets print('load data sets') if args.use_dataset_api: # if you get out of CPU memory, this potion can reduce the memory usage train = load_data_using_dataset_api(fi_name=args.train, vocab=vocab) else: train = get_input_dataset(fi_name=args.train, vocab=vocab) dev = get_input_dataset(fi_name=args.dev, vocab=vocab) if args.kernel: kernel = read_kernel(args.kernel) else: # kernel = [(m10 / 10., 0.1) for m10 in range(-9, 11, 2)] kernel = [(1.0, 0.001)] + [(m10 / 10., 0.1) for m10 in range(-9, 11, 2)] print('# train data: {}'.format(len(train))) print('# dev data: {}'.format(len(dev))) print('# vocab size: {}'.format(len(vocab))) print('# kernel size: {}'.format(len(kernel))) # Setup your defined model if args.model == 'transfer': Encoder = nets.KernelEncoder elif args.model == 'cnn': Encoder = nets.KernelEncoderCNN encoder = Encoder(kernel=kernel, n_vocab=len(vocab), n_units=args.unit, dropout=args.dropout, hidden_units=args.h_unit, embed_init=embed_init) model = nets.PairwiseRanker(encoder, debug=args.debug_mode) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(1e-4)) # Set up a trainer train_iter = chainer.iterators.SerialIterator(train, args.batchsize) updater = training.updaters.StandardUpdater(train_iter, optimizer, converter=convert_seq3, device=args.gpu) # Early Stopping if args.early_stop: stop_trigger = triggers.EarlyStoppingTrigger( monitor='validation/main/loss', max_trigger=(args.epoch, 'epoch')) else: stop_trigger = (args.epoch, 'epoch') trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch and validation_interval trainer.extend(nets.EvaluationPairwise(model, dev, vocab, key='validation/main', device=args.gpu), trigger=(args.validation_interval, 'iteration')) trainer.extend(nets.EvaluationPairwise(model, dev, vocab, key='validation/main', device=args.gpu), trigger=(1, 'epoch')) # Take a snapshot if args.snapshot_divide: trainer.extend( extensions.snapshot(filename='snapshot_iter_{.updater.iteration}'), trigger=(args.snapshot_interval, 'iteration')) else: trainer.extend(extensions.snapshot(filename='snapshot_latest'), trigger=(args.snapshot_interval, 'iteration')) # Save a model if args.save_epoch: trainer.extend(extensions.snapshot_object( model, 'epoch{.updater.epoch}_model.npz'), trigger=(1, 'epoch')) else: record_trigger = training.triggers.MaxValueTrigger( 'validation/main/accuracy', (1, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'best_model.npz'), trigger=record_trigger) # Write a log of evaluation statistics for each epoch trainer.extend( extensions.LogReport(trigger=(args.validation_interval, 'iteration'))) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ]), trigger=(args.validation_interval, 'iteration')) if args.progressbar: # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # Save vocabulary and model's setting if not os.path.isdir(args.out): os.mkdir(args.out) vocab_path = os.path.join(args.out, 'vocab.json') with open(vocab_path, 'w') as f: json.dump(vocab, f) model_path = os.path.join(args.out, 'best_model.npz') model_setup = args.__dict__ model_setup['vocab_path'] = vocab_path model_setup['model_path'] = model_path model_setup['datetime'] = current_datetime model_setup['kernel'] = kernel model_setup['vocab_size'] = len(vocab) with open(os.path.join(args.out, 'args.json'), 'w') as f: json.dump(args.__dict__, f) if args.save_init: chainer.serializers.save_npz(os.path.join(args.out, 'init_model.npz'), model) if args.resume is not None: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training print('Start trainer.run: {}'.format(current_datetime)) trainer.run() print('Elapsed_time: {}'.format( datetime.timedelta(seconds=time.time() - start)))
def main(hpt): logger.info('load New Data From Matlab') if hpt.dataset.type == 'synthetic': treeFile = loadmat(hpt.dataset['matrixFile_struct']) matrixForData = treeFile.get('matrixForHS') hpt.dataset.__setitem__('depth', treeFile.get('depthToSave')[0, 0]) hpt.dataset.__setitem__('depthReal', treeFile.get('depthToSaveReal')[0, 0]) hpt.training.__setitem__('batch_size', treeFile.get('baches')[0, 0]) print(treeFile.get('matrixForHS')) print(hpt.dataset.depth) print(hpt.training.batch_size) elif hpt.dataset.type == 'mnist': activityFile = loadmat(hpt.dataset['matrixFile_activity']) matrixForData = np.transpose(activityFile.get('roiActivity')) hpt.dataset.__setitem__('mnistShape', len(matrixForData[1, :])) hpt.training.__setitem__('batch_size', len(matrixForData[:, 1])) logger.info('build model') avg_elbo_loss = get_model(hpt) if hpt.general.gpu >= 0: avg_elbo_loss.to_gpu(hpt.general.gpu) logger.info('setup optimizer') if hpt.optimizer.type == 'adam': optimizer = chainer.optimizers.Adam(alpha=hpt.optimizer.lr) optimizer.setup(avg_elbo_loss) logger.info('load dataset') train, valid, test = dataset.get_dataset(hpt.dataset.type, matrixForData, **hpt.dataset) if hpt.general.test: train, _ = chainer.datasets.split_dataset(train, 100) valid, _ = chainer.datasets.split_dataset(valid, 100) test, _ = chainer.datasets.split_dataset(test, 100) train_iter = chainer.iterators.SerialIterator(train, hpt.training.batch_size) valid_iter = chainer.iterators.SerialIterator(valid, hpt.training.batch_size, repeat=False, shuffle=False) logger.info('setup updater/trainer') updater = training.updaters.StandardUpdater(train_iter, optimizer, device=hpt.general.gpu, loss_func=avg_elbo_loss) if not hpt.training.early_stopping: trainer = training.Trainer(updater, (hpt.training.iteration, 'iteration'), out=po.namedir(output='str')) else: trainer = training.Trainer(updater, triggers.EarlyStoppingTrigger( monitor='validation/main/loss', patients=5, max_trigger=(hpt.training.iteration, 'iteration')), out=po.namedir(output='str')) if hpt.training.warm_up != -1: time_range = (0, hpt.training.warm_up) trainer.extend( extensions.LinearShift('beta', value_range=(0.1, hpt.loss.beta), time_range=time_range, optimizer=avg_elbo_loss)) trainer.extend( extensions.Evaluator(valid_iter, avg_elbo_loss, device=hpt.general.gpu)) # trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.snapshot_object( avg_elbo_loss, 'avg_elbo_loss_snapshot_iter_{.updater.iteration}'), trigger=(int(hpt.training.iteration / 5), 'iteration')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr()) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/reconstr', 'main/kl_penalty', 'main/beta', 'lr', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) logger.info('run training') trainer.run() logger.info('save last model') extensions.snapshot_object( avg_elbo_loss, 'avg_elbo_loss_snapshot_iter_{.updater.iteration}')(trainer) logger.info('evaluate') metrics = evaluate(hpt, train, test, avg_elbo_loss) for metric_name, metric in metrics.items(): logger.info('{}: {:.4f}'.format(metric_name, metric)) if hpt.general.noplot: return metrics logger.info('visualize images') visualize(hpt, train, test, avg_elbo_loss, treeFile) return metrics
def main(): start = time.time() current_datetime = '{}'.format(datetime.datetime.today()) parser = argparse.ArgumentParser(description='Chainer Text Classification') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=30, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=200, help='Number of units') parser.add_argument('--vocab', '-v', type=int, default=100000, help='Number of max vocabulary') parser.add_argument('--layer', '-l', type=int, default=1, help='Number of layers of RNN or MLP following CNN') parser.add_argument('--dropout', '-d', type=float, default=0.4, help='Dropout rate') parser.add_argument('--dataset', '-dataset', required=True, help='train dataset') parser.add_argument('--size', '-size', type=int, default=-1, help='train dataset size -> def train:3/4, test:1/4') parser.add_argument('--model', '-model', default='cnn', choices=['cnn', 'lstm', 'bow', 'gru'], help='Name of encoder model type.') parser.add_argument('--early-stop', action='store_true', help='use early stopping method') parser.add_argument('--same-network', action='store_true', help='use same network between i1 and i2') parser.add_argument('--save-init', action='store_true', help='save init model') parser.add_argument('--char-based', action='store_true') args = parser.parse_args() print(json.dumps(args.__dict__, indent=2)) train, test, vocab = get_input_dataset(args.dataset, vocab=None, max_vocab_size=args.vocab) print('# train data: {}'.format(len(train))) print('# dev data: {}'.format(len(test))) print('# vocab: {}'.format(len(vocab))) n_class = len(set([int(d[-1]) for d in train])) print('# class: {}'.format(n_class)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Setup a model if args.model == 'lstm': Encoder = nets.LSTMEncoder elif args.model == 'cnn': Encoder = nets.CNNEncoder elif args.model == 'bow': Encoder = nets.BOWMLPEncoder elif args.model == 'gru': Encoder = nets.GRUEncoder encoder = Encoder(n_layers=args.layer, n_vocab=len(vocab), n_units=args.unit, dropout=args.dropout, same_network=args.same_network) model = nets.TextClassifier(encoder, n_class) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(1e-4)) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, converter=convert_seq2, device=args.gpu) # early Stopping if args.early_stop: stop_trigger = triggers.EarlyStoppingTrigger( monitor='validation/main/loss', max_trigger=(args.epoch, 'epoch')) else: stop_trigger = (args.epoch, 'epoch') trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend( extensions.Evaluator(test_iter, model, converter=convert_seq2, device=args.gpu)) # Take a best snapshot record_trigger = training.triggers.MaxValueTrigger( 'validation/main/accuracy', (1, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'best_model.npz'), trigger=record_trigger) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout # trainer.extend(extensions.ProgressBar()) # Save vocabulary and model's setting if not os.path.isdir(args.out): os.mkdir(args.out) vocab_path = os.path.join(args.out, 'vocab.json') with open(vocab_path, 'w') as f: json.dump(vocab, f) model_path = os.path.join(args.out, 'best_model.npz') model_setup = args.__dict__ model_setup['vocab_path'] = vocab_path model_setup['model_path'] = model_path model_setup['n_class'] = n_class model_setup['datetime'] = current_datetime with open(os.path.join(args.out, 'args.json'), 'w') as f: json.dump(args.__dict__, f) if args.save_init: chainer.serializers.save_npz(os.path.join(args.out, 'init_model.npz'), model) exit() # Run the training print('Start trainer.run: {}'.format(current_datetime)) trainer.run() print('Elapsed_time: {}'.format( datetime.timedelta(seconds=time.time() - start)))