def check_mnist(gpu, display_log=True): epoch = 5 batchsize = 100 n_units = 100 comm = chainermn.create_communicator('naive') if gpu: device = comm.intra_rank chainer.cuda.get_device_from_id(device).use() else: device = -1 model = L.Classifier(MLP(n_units, 10)) if gpu: model.to_device(cupy.cuda.Device()) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (epoch, 'epoch')) # Wrap standard Chainer evaluators by MultiNodeEvaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Add checkpointer. This is just to check checkpointing runs # without errors path = tempfile.mkdtemp(dir='/tmp', prefix=__name__ + '-tmp-') checkpointer = create_multi_node_checkpointer(name=__name__, comm=comm, path=path) trainer.extend(checkpointer, trigger=(1, 'epoch')) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0 and display_log: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ], out=sys.stderr), trigger=(1, 'epoch')) trainer.run() err = evaluator()['validation/main/accuracy'] assert err > 0.95 # Check checkpointer successfully finalized snapshot directory assert [] == os.listdir(path) os.removedirs(path)
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', type=str, help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() device = chainer.get_device(args.device) print('Device: {}'.format(device)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = L.Classifier(MLP(args.unit, 10)) model.to_device(device) device.use() # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. # TODO(niboshi): Temporarily disabled for chainerx. Fix it. if device.xp is not chainerx: trainer.extend(extensions.DumpGraph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume is not None: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): args = parse_arguments() # Set up some useful variables that will be used later on. dataset_name = args.dataset method = args.method num_data = args.num_data n_unit = args.unit_num conv_layers = args.conv_layers task_type = molnet_default_config[dataset_name]['task_type'] model_filename = {'classification': 'classifier.pkl', 'regression': 'regressor.pkl'} print('Using dataset: {}...'.format(dataset_name)) # Set up some useful variables that will be used later on. if args.label: labels = args.label cache_dir = os.path.join('input', '{}_{}_{}'.format(dataset_name, method, labels)) class_num = len(labels) if isinstance(labels, list) else 1 else: labels = None cache_dir = os.path.join('input', '{}_{}_all'.format(dataset_name, method)) class_num = len(molnet_default_config[args.dataset]['tasks']) # Load the train and validation parts of the dataset. filenames = [dataset_part_filename(p, num_data) for p in ['train', 'valid']] paths = [os.path.join(cache_dir, f) for f in filenames] if all([os.path.exists(path) for path in paths]): dataset_parts = [] for path in paths: print('Loading cached dataset from {}.'.format(path)) dataset_parts.append(NumpyTupleDataset.load(path)) else: dataset_parts = download_entire_dataset(dataset_name, num_data, labels, method, cache_dir) train, valid = dataset_parts[0], dataset_parts[1] # # Scale the label values, if necessary. # if args.scale == 'standardize': # if task_type == 'regression': # print('Applying standard scaling to the labels.') # datasets, scaler = standardize_dataset_labels(datasets) # else: # print('Label scaling is not available for classification tasks.') # else: # print('No label scaling was selected.') # scaler = None # Set up the predictor. predictor = set_up_predictor(method, n_unit, conv_layers, class_num) # Set up the iterators. train_iter = iterators.SerialIterator(train, args.batchsize) valid_iter = iterators.SerialIterator(valid, args.batchsize, repeat=False, shuffle=False) # Load metrics for the current dataset. metrics = molnet_default_config[dataset_name]['metrics'] metrics_fun = {k: v for k, v in metrics.items() if isinstance(v, types.FunctionType)} loss_fun = molnet_default_config[dataset_name]['loss'] if task_type == 'regression': model = Regressor(predictor, lossfun=loss_fun, metrics_fun=metrics_fun, device=args.gpu) # TODO: Use standard scaler for regression task elif task_type == 'classification': model = Classifier(predictor, lossfun=loss_fun, metrics_fun=metrics_fun, device=args.gpu) else: raise ValueError('Invalid task type ({}) encountered when processing ' 'dataset ({}).'.format(task_type, dataset_name)) # Set up the optimizer. optimizer = optimizers.Adam() optimizer.setup(model) # Save model-related output to this directory. model_dir = os.path.join(args.out, os.path.basename(cache_dir)) if not os.path.exists(model_dir): os.makedirs(model_dir) # Set up the updater. updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) # Set up the trainer. trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=model_dir) trainer.extend(E.Evaluator(valid_iter, model, device=args.gpu, converter=concat_mols)) trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(E.LogReport()) # Report various metrics. print_report_targets = ['epoch', 'main/loss', 'validation/main/loss'] for metric_name, metric_fun in metrics.items(): if isinstance(metric_fun, types.FunctionType): print_report_targets.append('main/' + metric_name) print_report_targets.append('validation/main/' + metric_name) elif issubclass(metric_fun, BatchEvaluator): trainer.extend(metric_fun(valid_iter, model, device=args.gpu, eval_func=predictor, converter=concat_mols, name='val', raise_value_error=False)) print_report_targets.append('val/main/' + metric_name) else: raise TypeError('{} is not a supported metrics function.' .format(type(metrics_fun))) print_report_targets.append('elapsed_time') trainer.extend(E.PrintReport(print_report_targets)) trainer.extend(E.ProgressBar()) trainer.run() # Save the model's parameters. model_path = os.path.join(model_dir, model_filename[task_type]) print('Saving the trained model to {}...'.format(model_path)) model.save_pickle(model_path, protocol=args.protocol)
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', default=20, type=int, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu0', '-g', default=0, type=int, help='First GPU ID') parser.add_argument('--gpu1', '-G', default=1, type=int, help='Second GPU ID') parser.add_argument('--out', '-o', default='result_parallel', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', default=1000, type=int, help='Number of units') args = parser.parse_args() print('GPU: {}, {}'.format(args.gpu0, args.gpu1)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # See train_mnist.py for the meaning of these lines model = L.Classifier(ParallelMLP(args.unit, 10, args.gpu0, args.gpu1)) chainer.backends.cuda.get_device_from_id(args.gpu0).use() optimizer = chainer.optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu0) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
# make training data data_maker = DataMaker(steps_per_cycle=STEPS_PER_CYCLE, number_of_cycles=NUMBER_OF_CYCLES) train_data = data_maker.make(LENGTH_OF_SEQUENCE) # Iterator batchsize = 100 train_iter = iterators.SerialIterator(train_data, batchsize) # import pdb; pdb.set_trace() # setup model model = LSTM(IN_UNITS, HIDDEN_UNITS, OUT_UNITS) # setup optimizer optimizer = optimizers.Adam() optimizer.setup(model) start = time.time() updater = training.StandardUpdater(train_iter, optimizer, MyConverter) trainer = training.Trainer(updater, (20, 'epoch'), out='result') trainer.extend(extensions.LogReport()) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.observe_lr()) trainer.extend( extensions.PrintReport(['epoch', 'main/loss', 'elapsed_time', 'lr'])) trainer.run() end = time.time() print("{}[sec]".format(end - start))
def main(): parser = argparse.ArgumentParser(description='SLPolicyNetwork', formatter_class=RawTextHelpFormatter) parser.add_argument('CONFIG', default=None, type=str, help='path to config file') parser.add_argument('--gpu', type=int, default=-1, help='gpu numbers\nto specify') parser.add_argument('--debug', default=False, action='store_true', help='switch to debug mode') args = parser.parse_args() with open(args.CONFIG, "r") as f: config = json.load(f) path = 'debug' if args.debug else 'data' b = config["arguments"]["batch_size"] epoch = config["arguments"]["epoch"] print('*** making training data ***') train_data = load_data(config[path]["train"]) # (state, action) = ((3, 8, 8), (1)) train_iter = iterators.SerialIterator(train_data, b) valid_data = load_data(config[path]["valid"]) valid_iter = iterators.SerialIterator(valid_data, b, repeat=False, shuffle=False) print('*** preparing model ***') n_input_channel = config["arguments"]["n_input_channel"] n_output_channel = config["arguments"]["n_output_channel"] model = SLPolicyNetwork(n_input_channel=n_input_channel, n_output_channel=n_output_channel) if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() model.to_gpu(args.gpu) model.set_cache() optimizer = chainer.optimizers.Adam(alpha=config["arguments"]["learning_rate"]) optimizer.setup(model) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (epoch, 'epoch'), out='result' + '/' + config["arguments"]["save_path"]) @chainer.training.make_extension() def predict_next_move(_): state, action = valid_data[np.random.choice(len(valid_data))] n_channel, row, column = state.shape if args.gpu >= 0: state = cuda.to_gpu(state) prediction = model.predict(state.reshape(1, n_channel, row, column)) print_board(state) print(f'action : {translate(int(action))}') print(f'prediction : {translate(int(np.argmax(F.softmax(prediction).data, axis=1)))}') trainer.extend(predict_next_move, trigger=(1, 'epoch')) trainer.extend(extensions.Evaluator(valid_iter, model, device=args.gpu)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time'])) if args.debug is False: trainer.extend(extensions.snapshot_object(model, 'slpn.epoch{.updater.epoch}.npz'), trigger=(10, 'epoch')) save_trigger_for_accuracy = chainer.training.triggers.MaxValueTrigger(key='validation/main/accuracy', trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'slpn.best_accuracy.npz'), trigger=save_trigger_for_accuracy) save_trigger_for_loss = chainer.training.triggers.MinValueTrigger(key='validation/main/loss', trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'slpn.best_loss.npz'), trigger=save_trigger_for_loss) print('*** start training ***') trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=2, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=50, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. predictor = MLP(args.unit, 10) model = L.Classifier(predictor) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) # Print a progress bar to stdout #trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run() # Save trained model serializers.save_npz('{}/mlp.model'.format(args.out), model) save_dir = 'store_model' predictor.save(save_dir) print('model args : ', predictor._init_args) print('model kwargs: ', predictor._init_kwargs)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config_path', type=str, default='config.ini') parser.add_argument('--resume') parser.add_argument('--plot_samples', type=int, default=0) args = parser.parse_args() config = configparser.ConfigParser() config.read(args.config_path, 'UTF-8') chainer.global_config.autotune = True # chainer.cuda.set_max_workspace_size(11388608) chainer.cuda.set_max_workspace_size(512 * 1024 * 1024) chainer.config.cudnn_fast_batch_normalization = True # create result dir and copy file logger.info('> store file to result dir %s', config.get('result', 'dir')) save_files(config.get('result', 'dir')) logger.info('> set up devices') devices = setup_devices(config.get('training_param', 'gpus')) set_random_seed(devices, config.getint('training_param', 'seed')) logger.info('> get dataset') dataset_type = config.get('dataset', 'type') if dataset_type == 'coco': # force to set `use_cache = False` train_set = get_coco_dataset( insize=parse_size(config.get('model_param', 'insize')), image_root=config.get(dataset_type, 'train_images'), annotations=config.get(dataset_type, 'train_annotations'), min_num_keypoints=config.getint(dataset_type, 'min_num_keypoints'), use_cache=False, do_augmentation=True, ) test_set = get_coco_dataset( insize=parse_size(config.get('model_param', 'insize')), image_root=config.get(dataset_type, 'val_images'), annotations=config.get(dataset_type, 'val_annotations'), min_num_keypoints=config.getint(dataset_type, 'min_num_keypoints'), use_cache=False, ) elif dataset_type == 'mpii': train_set, test_set = get_mpii_dataset( insize=parse_size(config.get('model_param', 'insize')), image_root=config.get(dataset_type, 'images'), annotations=config.get(dataset_type, 'annotations'), train_size=config.getfloat(dataset_type, 'train_size'), min_num_keypoints=config.getint(dataset_type, 'min_num_keypoints'), use_cache=config.getboolean(dataset_type, 'use_cache'), seed=config.getint('training_param', 'seed'), ) else: raise Exception('Unknown dataset {}'.format(dataset_type)) logger.info('dataset type: %s', dataset_type) logger.info('training images: %d', len(train_set)) logger.info('validation images: %d', len(test_set)) if args.plot_samples > 0: for i in range(args.plot_samples): data = train_set[i] visualize.plot('train-{}.png'.format(i), data['image'], data['keypoints'], data['bbox'], data['is_labeled'], data['edges']) data = test_set[i] visualize.plot('val-{}.png'.format(i), data['image'], data['keypoints'], data['bbox'], data['is_labeled'], data['edges']) logger.info('> load model') model = create_model(config, train_set) logger.info('> transform dataset') train_set = TransformDataset(train_set, model.encode) test_set = TransformDataset(test_set, model.encode) logger.info('> create iterators') train_iter = chainer.iterators.MultiprocessIterator( train_set, config.getint('training_param', 'batchsize'), n_processes=config.getint('training_param', 'num_process')) test_iter = chainer.iterators.MultiprocessIterator( test_set, config.getint('training_param', 'batchsize'), repeat=False, shuffle=False, n_processes=config.getint('training_param', 'num_process')) logger.info('> setup optimizer') optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005)) logger.info('> setup trainer') updater = training.updaters.ParallelUpdater(train_iter, optimizer, devices=devices) trainer = training.Trainer( updater, (config.getint('training_param', 'train_iter'), 'iteration'), config.get('result', 'dir')) logger.info('> setup extensions') trainer.extend(extensions.LinearShift( 'lr', value_range=(config.getfloat('training_param', 'learning_rate'), 0), time_range=(0, config.getint('training_param', 'train_iter'))), trigger=(1, 'iteration')) trainer.extend( extensions.Evaluator(test_iter, model, device=devices['main'])) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport([ 'main/loss', 'validation/main/loss', ], 'epoch', file_name='loss.png')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr()) trainer.extend( extensions.PrintReport([ 'epoch', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss', 'main/loss_resp', 'validation/main/loss_resp', 'main/loss_iou', 'validation/main/loss_iou', 'main/loss_coor', 'validation/main/loss_coor', 'main/loss_size', 'validation/main/loss_size', 'main/loss_limb', 'validation/main/loss_limb', ])) trainer.extend(extensions.ProgressBar()) trainer.extend( extensions.snapshot(filename='best_snapshot'), trigger=training.triggers.MinValueTrigger('validation/main/loss')) trainer.extend( extensions.snapshot_object(model, filename='bestmodel.npz'), trigger=training.triggers.MinValueTrigger('validation/main/loss')) if args.resume: serializers.load_npz(args.resume, trainer) logger.info('> start training') trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--lr', '-l', type=float, default=None, help='Learning rate for multi GPUs') parser.add_argument('--batch-size', type=int, default=8) parser.add_argument('--epoch', '-e', type=int, default=18) parser.add_argument('--cooldown-epoch', '-ce', type=int, default=12) args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() # chainermn comm = chainermn.create_communicator() device = comm.intra_rank np.random.seed(args.seed) # model proposal_creator_params = FCISResNet101.proposal_creator_params proposal_creator_params['min_size'] = 2 fcis = FCISResNet101( n_fg_class=len(coco_instance_segmentation_label_names), anchor_scales=(4, 8, 16, 32), pretrained_model='imagenet', iter2=False, proposal_creator_params=proposal_creator_params) fcis.use_preset('coco_evaluate') proposal_target_creator = ProposalTargetCreator() proposal_target_creator.neg_iou_thresh_lo = 0.0 model = FCISTrainChain(fcis, proposal_target_creator=proposal_target_creator) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # train dataset train_dataset = COCOInstanceSegmentationDataset(year='2014', split='train') vmml_dataset = COCOInstanceSegmentationDataset(year='2014', split='valminusminival') # filter non-annotated data train_indices = np.array([ i for i, label in enumerate(train_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) train_dataset = train_dataset.slice[train_indices] vmml_indices = np.array([ i for i, label in enumerate(vmml_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) vmml_dataset = vmml_dataset.slice[vmml_indices] train_dataset = TransformDataset( ConcatenatedDataset(train_dataset, vmml_dataset), ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis)) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=args.batch_size // comm.size) # test dataset if comm.rank == 0: test_dataset = COCOInstanceSegmentationDataset(year='2014', split='minival', use_crowded=True, return_crowded=True, return_area=True) indices = np.arange(len(test_dataset)) test_dataset = test_dataset.slice[indices] test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(momentum=0.9), comm) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, converter=concat_examples, device=device) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # lr scheduler @make_shift('lr') def lr_scheduler(trainer): if args.lr is None: base_lr = 0.0005 * args.batch_size else: base_lr = args.lr iteration = trainer.updater.iteration epoch = trainer.updater.epoch if (iteration * comm.size) < 2000: rate = 0.1 elif epoch < args.cooldown_epoch: rate = 1 else: rate = 0.1 return rate * base_lr trainer.extend(lr_scheduler) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions trainer.extend(extensions.snapshot_object( model.fcis, filename='snapshot_model.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) report_items = [ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map/iou=0.50:0.95/area=all/max_dets=100', ] trainer.extend(extensions.PrintReport(report_items), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationCOCOEvaluator( test_iter, model.fcis, label_names=coco_instance_segmentation_label_names), trigger=ManualScheduleTrigger([ len(train_dataset) * args.cooldown_epoch, len(train_dataset) * args.epoch ], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): # command line argument parsing parser = argparse.ArgumentParser(description='Digraph Embedding') parser.add_argument('input', help='Path to the digraph description file') parser.add_argument( '--validation', '-val', default=None, help='Path to the digraph description file for validation') parser.add_argument('--coordinates', '-c', help='Path to the coordinate file for initialization') parser.add_argument('--batchsize_edge', '-be', type=int, default=100, help='Number of samples in each edge mini-batch') parser.add_argument('--batchsize_anchor', '-ba', type=int, default=-1, help='Number of samples in each anchor mini-batch') parser.add_argument( '--batchsize_vert', '-bv', type=int, default=-1, help= 'Number of samples in each vertex mini-batch (used for sampling negative edges)' ) parser.add_argument( '--batchsize_negative', '-bn', type=int, default=0, help= 'Number of negative edges sampled for each vertex mini-batch (positive: exact negative edge sampling, negative: random sampling to approximate negative edges)' ) parser.add_argument('--vertex_offset', type=int, default=0, help='the smallest index of vertices') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--dim', '-d', type=int, default=2, help='Embedding dimension') parser.add_argument('--dag', type=float, default=0, help='0:non-acyclic, 1:acyclic') parser.add_argument('--margin', '-m', type=float, default=0.01, help='margin for the metric boundary') parser.add_argument('--weight_decay', '-wd', type=float, default=0, help='weight decay for regularization on coordinates') parser.add_argument('--wd_norm', '-wn', choices=['l1', 'l2'], default='l2', help='norm of weight decay for regularization') parser.add_argument('--learning_rate', '-lr', type=float, default=5e-2, help='learning rate') parser.add_argument('--learning_rate_drop', '-ld', type=int, default=5, help='how many times to half learning rate') # parser.add_argument('--lambda_super_neg', '-lsn', type=float, default=0, # help='Super negative samples') parser.add_argument('--lambda_pos', '-lp', type=float, default=1, help='weight for loss for positive edges') parser.add_argument('--lambda_neg', '-ln', type=float, default=1, help='weight for loss for negative edges') parser.add_argument( '--lambda_anchor', '-la', type=float, default=1, help= 'anchor should reside in the disk. if set to 0, anchors are fixed to the centre of the spheres' ) parser.add_argument('--lambda_uniform_radius', '-lur', type=float, default=0, help='all radiuses should be similar') parser.add_argument('--outdir', '-o', default='result', help='Directory to output the result') parser.add_argument('--optimizer', '-op', choices=optim.keys(), default='Adam', help='optimizer') parser.add_argument('--vis_freq', '-vf', type=int, default=-1, help='evaluation frequency in iteration') parser.add_argument('--mpi', action='store_true', help='parallelise with MPI') parser.add_argument('--reconstruct', '-r', action='store_true', help='reconstruct graph during evaluation') parser.add_argument('--plot', '-p', action='store_true', help='plot result (dim=2 only)') # parser.add_argument('--training', '-t', action='store_false',help='reconstruct graph') args = parser.parse_args() # default batchsize if args.batchsize_anchor < 0: args.batchsize_anchor = 10 * args.batchsize_edge if args.batchsize_vert < 0: if args.batchsize_negative == 0: args.batchsize_vert = 10 * args.batchsize_edge else: args.batchsize_vert = args.batchsize_edge args.outdir = os.path.join(args.outdir, dt.now().strftime('%m%d_%H%M')) save_args(args, args.outdir) chainer.config.autotune = True vert, pos_edge = read_graph(args.input, args.vertex_offset) vnum = np.max(vert) + 1 ## ChainerMN if args.mpi: import chainermn if args.gpu >= 0: comm = chainermn.create_communicator() chainer.cuda.get_device(comm.intra_rank).use() else: comm = chainermn.create_communicator('naive') if comm.rank == 0: primary = True print(args) chainer.print_runtime_info() print("#edges {}, #vertices {}".format(len(pos_edge), len(vert))) else: primary = False print("process {}".format(comm.rank)) else: primary = True print(args) chainer.print_runtime_info() print("#edges {}, #vertices {}".format(len(pos_edge), len(vert))) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # data edge_iter = iterators.SerialIterator(datasets.TupleDataset( pos_edge[:, 0], pos_edge[:, 1]), args.batchsize_edge, shuffle=True) vert_iter = iterators.SerialIterator(datasets.TupleDataset(vert), args.batchsize_vert, shuffle=True) anchor_iter = iterators.SerialIterator(datasets.TupleDataset(vert), args.batchsize_anchor, shuffle=True) graph = nx.from_edgelist(pos_edge, nx.DiGraph()) if args.validation and primary: val_vert, val_edge = read_graph(args.validation, args.vertex_offset) val_graph = nx.from_edgelist(val_edge, nx.DiGraph()) print("validation #edges {}, #vertices {}".format( len(val_edge), len(val_vert))) else: val_graph = graph if args.vis_freq < 0: args.vis_freq = int(len(pos_edge) * args.epoch / 10) # initial embedding if args.coordinates: coords = np.loadtxt(args.coordinates, delimiter=",") else: coords = np.zeros((vnum, 1 + 2 * args.dim)) # anchor = centre X = 2 * np.random.rand(vnum, args.dim) - 1 coords[:, 1:args.dim + 1] = X coords[:, args.dim + 1:] = X # the first coordinate corresponds to the radius r=0.1 coords[:, 0] = 0.1 coords = L.Parameter(coords) # set up an optimizer def make_optimizer(model): if args.optimizer in [ 'SGD', 'Momentum', 'CMomentum', 'AdaGrad', 'RMSprop', 'NesterovAG', 'LBFGS' ]: optimizer = optim[args.optimizer](lr=args.learning_rate) elif args.optimizer in ['AdaDelta']: optimizer = optim[args.optimizer]() elif args.optimizer in ['Adam', 'AdaBound', 'Eve']: optimizer = optim[args.optimizer]( alpha=args.learning_rate, weight_decay_rate=args.weight_decay) if args.mpi: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) return optimizer opt = make_optimizer(coords) if args.weight_decay > 0 and (not args.optimizer in ['Adam', 'AdaBound', 'Eve']): if args.wd_norm == 'l2': opt.add_hook(chainer.optimizer_hooks.WeightDecay( args.weight_decay)) else: opt.add_hook(chainer.optimizer_hooks.Lasso(args.weight_decay)) if args.gpu >= 0: coords.to_gpu() updater = Updater( models=coords, iterator={ 'main': edge_iter, 'vertex': vert_iter, 'anchor': anchor_iter }, optimizer={'main': opt}, device=args.gpu, # converter=convert.ConcatWithAsyncTransfer(), params={ 'args': args, 'graph': graph }) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir) if primary: log_interval = 20, 'iteration' log_keys = [ 'iteration', 'lr', 'elapsed_time', 'main/loss_pos', 'main/loss_neg', 'main/loss_anc' ] if args.validation: log_keys.extend( ['myval/prc', 'myval/rec', 'myval/f1', 'myval/anc']) if args.lambda_uniform_radius > 0: log_keys.append('main/loss_rad') trainer.extend(extensions.observe_lr('main'), trigger=log_interval) trainer.extend( extensions.LogReport(keys=log_keys, trigger=log_interval)) # trainer.extend(extensions.LogReport(keys=log_keys, trigger=log_interval)) trainer.extend(extensions.PrintReport(log_keys), trigger=log_interval) # trainer.extend(extensions.PrintReport(log_keys), trigger=(1, 'iteration')) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(log_keys[3:], 'epoch', file_name='loss.png', postprocess=plot_log)) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.snapshot_object(opt, 'opt{.updater.epoch}.npz'), trigger=(args.epoch, 'epoch')) if args.vis_freq > 0: trainer.extend(Evaluator({'main': edge_iter}, coords, params={ 'args': args, 'graph': val_graph }, device=args.gpu), trigger=(args.vis_freq, 'iteration')) # trainer.extend(extensions.ParameterStatistics(coords)) # ChainerUI save_args(args, args.outdir) if args.optimizer in [ 'Momentum', 'CMomentum', 'AdaGrad', 'RMSprop', 'NesterovAG' ]: trainer.extend(extensions.ExponentialShift('lr', 0.5, optimizer=opt), trigger=(args.epoch / args.learning_rate_drop, 'epoch')) elif args.optimizer in ['Adam', 'AdaBound', 'Eve']: trainer.extend(extensions.ExponentialShift("alpha", 0.5, optimizer=opt), trigger=(args.epoch / args.learning_rate_drop, 'epoch')) # if args.training: trainer.run() # result if primary: # save DAG data file if (args.gpu > -1): dat = coords.xp.asnumpy(coords.W.data) else: dat = coords.W.data if args.lambda_anchor == 0: # anchor = centre dat[:, 1:(args.dim + 1)] = dat[:, (args.dim + 1):] redge = reconstruct(dat, dag=args.dag) np.savetxt(os.path.join(args.outdir, "original.csv"), pos_edge, fmt='%i', delimiter=",") np.savetxt(os.path.join(args.outdir, "reconstructed.csv"), redge, fmt='%i', delimiter=",") np.savetxt(os.path.join(args.outdir, "coords.csv"), dat, fmt='%1.5f', delimiter=",") f1, prc, rec, acc = compare_graph( val_graph, nx.from_edgelist(redge, nx.DiGraph())) if args.plot: plot_digraph(pos_edge, os.path.join(args.outdir, "original.png")) plot_digraph(redge, os.path.join(args.outdir, "reconstructed.png")) plot_disks(dat, os.path.join(args.outdir, "plot.png")) with open(os.path.join(args.outdir, "args.txt"), 'w') as fh: fh.write(" ".join(sys.argv)) fh.write( f"f1: {f1}, precision: {prc}, recall: {rec}, accuracy: {acc}")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--out', type=str, default='result', help='Output directory') parser.add_argument('--mscoco-root', type=str, default='data', help='MSOCO dataset root directory') parser.add_argument('--max-iters', type=int, default=50000, help='Maximum number of iterations to train') parser.add_argument('--batch-size', type=int, default=128, help='Minibatch size') parser.add_argument('--dropout-ratio', type=float, default=0.5, help='Language model dropout ratio') parser.add_argument('--val-keep-quantity', type=int, default=100, help='Keep every N-th validation image') parser.add_argument('--val-iter', type=int, default=100, help='Run validation every N-th iteration') parser.add_argument('--log-iter', type=int, default=1, help='Log every N-th iteration') parser.add_argument('--snapshot-iter', type=int, default=1000, help='Model snapshot every N-th iteration') parser.add_argument('--rnn', type=str, default='nsteplstm', choices=['nsteplstm', 'lstm'], help='Language model layer type') parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--max-caption-length', type=int, default=30, help='Maxium caption length when using LSTM layer') args = parser.parse_args() # Load the MSCOCO dataset. Assumes that the dataset has been downloaded # already using e.g. the `download.py` script train, val = datasets.get_mscoco(args.mscoco_root) # Validation samples are used to address overfitting and see how well your # model generalizes to yet unseen data. However, since the number of these # samples in MSCOCO is quite large (~200k) and thus require time to # evaluate, you may choose to use only a fraction of the available samples val = val[::args.val_keep_quantity] # Number of unique words that are found in the dataset vocab_size = len(train.vocab) # Instantiate the model to be trained either with LSTM layers or with # NStepLSTM layers model = ImageCaptionModel(vocab_size, dropout_ratio=args.dropout_ratio, rnn=args.rnn) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() def transform(in_data): # Called for each sample and applies necessary preprocessing to the # image such as resizing and normalizing img, caption = in_data img = model.prepare(img) return img, caption # We need to preprocess the images since their sizes may vary (and the # model requires that they have the exact same fixed size) train = TransformDataset(train, transform) val = TransformDataset(val, transform) train_iter = iterators.MultiprocessIterator(train, args.batch_size, shared_mem=700000) val_iter = chainer.iterators.MultiprocessIterator(val, args.batch_size, repeat=False, shuffle=False, shared_mem=700000) optimizer = optimizers.Adam() optimizer.setup(model) def converter(batch, device): # The converted receives a batch of input samples any may modify it if # necessary. In our case, we need to align the captions depending on if # we are using LSTM layers of NStepLSTM layers in the model. if args.rnn == 'lstm': max_caption_length = args.max_caption_length elif args.rnn == 'nsteplstm': max_caption_length = None else: raise ValueError('Invalid RNN type.') return datasets.converter(batch, device, max_caption_length=max_caption_length) updater = training.updater.StandardUpdater(train_iter, optimizer=optimizer, device=args.gpu, converter=converter) trainer = training.Trainer(updater, out=args.out, stop_trigger=(args.max_iters, 'iteration')) trainer.extend(extensions.Evaluator(val_iter, target=model, converter=converter, device=args.gpu), trigger=(args.val_iter, 'iteration')) trainer.extend( extensions.LogReport(['main/loss', 'validation/main/loss'], trigger=(args.log_iter, 'iteration'))) trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], trigger=(args.log_iter, 'iteration'))) trainer.extend(extensions.PrintReport([ 'elapsed_time', 'epoch', 'iteration', 'main/loss', 'validation/main/loss' ]), trigger=(args.log_iter, 'iteration')) # Save model snapshots so that later on, we can load them and generate new # captions for any image. This can be done in the `predict.py` script trainer.extend(extensions.snapshot_object(model, 'model_{.updater.iteration}'), trigger=(args.snapshot_iter, 'iteration')) trainer.extend(extensions.ProgressBar()) trainer.run()
def train(args): """Train with the given args. Args: args (namespace): The program arguments. """ set_deterministic_pytorch(args) # check cuda availability if not torch.cuda.is_available(): logging.warning("cuda is not available") # get input and output dimension info with open(args.valid_json, "rb") as f: valid_json = json.load(f)["utts"] utts = list(valid_json.keys()) idim = int(valid_json[utts[0]]["input"][0]["shape"][-1]) odim = int(valid_json[utts[0]]["output"][0]["shape"][-1]) logging.info("#input dims : " + str(idim)) logging.info("#output dims: " + str(odim)) # Initialize with pre-trained ASR encoder and MT decoder if args.enc_init is not None or args.dec_init is not None: model = load_trained_modules(idim, odim, args, interface=STInterface) else: model_class = dynamic_import(args.model_module) model = model_class(idim, odim, args) assert isinstance(model, STInterface) # write model config if not os.path.exists(args.outdir): os.makedirs(args.outdir) model_conf = args.outdir + "/model.json" with open(model_conf, "wb") as f: logging.info("writing a model config file to " + model_conf) f.write( json.dumps((idim, odim, vars(args)), indent=4, ensure_ascii=False, sort_keys=True).encode("utf_8")) for key in sorted(vars(args).keys()): logging.info("ARGS: " + key + ": " + str(vars(args)[key])) reporter = model.reporter # check the use of multi-gpu if args.ngpu > 1: if args.batch_size != 0: logging.warning( "batch size is automatically increased (%d -> %d)" % (args.batch_size, args.batch_size * args.ngpu)) args.batch_size *= args.ngpu # set torch device device = torch.device("cuda" if args.ngpu > 0 else "cpu") if args.train_dtype in ("float16", "float32", "float64"): dtype = getattr(torch, args.train_dtype) else: dtype = torch.float32 model = model.to(device=device, dtype=dtype) # Setup an optimizer if args.opt == "adadelta": optimizer = torch.optim.Adadelta(model.parameters(), rho=0.95, eps=args.eps, weight_decay=args.weight_decay) elif args.opt == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.opt == "noam": from espnet.nets.pytorch_backend.transformer.optimizer import get_std_opt optimizer = get_std_opt( model.parameters(), args.adim, args.transformer_warmup_steps, args.transformer_lr, ) else: raise NotImplementedError("unknown optimizer: " + args.opt) # setup apex.amp if args.train_dtype in ("O0", "O1", "O2", "O3"): try: from apex import amp except ImportError as e: logging.error( f"You need to install apex for --train-dtype {args.train_dtype}. " "See https://github.com/NVIDIA/apex#linux") raise e if args.opt == "noam": model, optimizer.optimizer = amp.initialize( model, optimizer.optimizer, opt_level=args.train_dtype) else: model, optimizer = amp.initialize(model, optimizer, opt_level=args.train_dtype) use_apex = True else: use_apex = False # FIXME: TOO DIRTY HACK setattr(optimizer, "target", reporter) setattr(optimizer, "serialize", lambda s: reporter.serialize(s)) # Setup a converter converter = CustomConverter( subsampling_factor=model.subsample[0], dtype=dtype, use_source_text=args.asr_weight > 0 or args.mt_weight > 0, ) # read json data with open(args.train_json, "rb") as f: train_json = json.load(f)["utts"] with open(args.valid_json, "rb") as f: valid_json = json.load(f)["utts"] use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0 # make minibatch list (variable length) train = make_batchset( train_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, min_batch_size=args.ngpu if args.ngpu > 1 else 1, shortest_first=use_sortagrad, count=args.batch_count, batch_bins=args.batch_bins, batch_frames_in=args.batch_frames_in, batch_frames_out=args.batch_frames_out, batch_frames_inout=args.batch_frames_inout, iaxis=0, oaxis=0, ) valid = make_batchset( valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, min_batch_size=args.ngpu if args.ngpu > 1 else 1, count=args.batch_count, batch_bins=args.batch_bins, batch_frames_in=args.batch_frames_in, batch_frames_out=args.batch_frames_out, batch_frames_inout=args.batch_frames_inout, iaxis=0, oaxis=0, ) load_tr = LoadInputsAndTargets( mode="asr", load_output=True, preprocess_conf=args.preprocess_conf, preprocess_args={"train": True}, # Switch the mode of preprocessing ) load_cv = LoadInputsAndTargets( mode="asr", load_output=True, preprocess_conf=args.preprocess_conf, preprocess_args={"train": False}, # Switch the mode of preprocessing ) # hack to make batchsize argument as 1 # actual bathsize is included in a list # default collate function converts numpy array to pytorch tensor # we used an empty collate function instead which returns list train_iter = ChainerDataLoader( dataset=TransformDataset(train, lambda data: converter([load_tr(data)])), batch_size=1, num_workers=args.n_iter_processes, shuffle=not use_sortagrad, collate_fn=lambda x: x[0], ) valid_iter = ChainerDataLoader( dataset=TransformDataset(valid, lambda data: converter([load_cv(data)])), batch_size=1, shuffle=False, collate_fn=lambda x: x[0], num_workers=args.n_iter_processes, ) # Set up a trainer updater = CustomUpdater( model, args.grad_clip, {"main": train_iter}, optimizer, device, args.ngpu, args.grad_noise, args.accum_grad, use_apex=use_apex, ) trainer = training.Trainer(updater, (args.epochs, "epoch"), out=args.outdir) if use_sortagrad: trainer.extend( ShufflingEnabler([train_iter]), trigger=(args.sortagrad if args.sortagrad != -1 else args.epochs, "epoch"), ) # Resume from a snapshot if args.resume: logging.info("resumed from %s" % args.resume) torch_resume(args.resume, trainer) # Evaluate the model with the test dataset for each epoch if args.save_interval_iters > 0: trainer.extend( CustomEvaluator(model, {"main": valid_iter}, reporter, device, args.ngpu), trigger=(args.save_interval_iters, "iteration"), ) else: trainer.extend( CustomEvaluator(model, {"main": valid_iter}, reporter, device, args.ngpu)) # Save attention weight at each epoch if args.num_save_attention > 0: data = sorted( list(valid_json.items())[:args.num_save_attention], key=lambda x: int(x[1]["input"][0]["shape"][1]), reverse=True, ) if hasattr(model, "module"): att_vis_fn = model.module.calculate_all_attentions plot_class = model.module.attention_plot_class else: att_vis_fn = model.calculate_all_attentions plot_class = model.attention_plot_class att_reporter = plot_class( att_vis_fn, data, args.outdir + "/att_ws", converter=converter, transform=load_cv, device=device, ) trainer.extend(att_reporter, trigger=(1, "epoch")) else: att_reporter = None # Save CTC prob at each epoch if (args.asr_weight > 0 and args.mtlalpha > 0) and args.num_save_ctc > 0: # NOTE: sort it by output lengths data = sorted( list(valid_json.items())[:args.num_save_ctc], key=lambda x: int(x[1]["output"][0]["shape"][0]), reverse=True, ) if hasattr(model, "module"): ctc_vis_fn = model.module.calculate_all_ctc_probs plot_class = model.module.ctc_plot_class else: ctc_vis_fn = model.calculate_all_ctc_probs plot_class = model.ctc_plot_class ctc_reporter = plot_class( ctc_vis_fn, data, args.outdir + "/ctc_prob", converter=converter, transform=load_cv, device=device, ikey="output", iaxis=1, ) trainer.extend(ctc_reporter, trigger=(1, "epoch")) else: ctc_reporter = None # Make a plot for training and validation values trainer.extend( extensions.PlotReport( [ "main/loss", "validation/main/loss", "main/loss_asr", "validation/main/loss_asr", "main/loss_mt", "validation/main/loss_mt", "main/loss_st", "validation/main/loss_st", ], "epoch", file_name="loss.png", )) trainer.extend( extensions.PlotReport( [ "main/acc", "validation/main/acc", "main/acc_asr", "validation/main/acc_asr", "main/acc_mt", "validation/main/acc_mt", ], "epoch", file_name="acc.png", )) trainer.extend( extensions.PlotReport(["main/bleu", "validation/main/bleu"], "epoch", file_name="bleu.png")) # Save best models trainer.extend( snapshot_object(model, "model.loss.best"), trigger=training.triggers.MinValueTrigger("validation/main/loss"), ) trainer.extend( snapshot_object(model, "model.acc.best"), trigger=training.triggers.MaxValueTrigger("validation/main/acc"), ) # save snapshot which contains model and optimizer states if args.save_interval_iters > 0: trainer.extend( torch_snapshot(filename="snapshot.iter.{.updater.iteration}"), trigger=(args.save_interval_iters, "iteration"), ) else: trainer.extend(torch_snapshot(), trigger=(1, "epoch")) # epsilon decay in the optimizer if args.opt == "adadelta": if args.criterion == "acc": trainer.extend( restore_snapshot(model, args.outdir + "/model.acc.best", load_fn=torch_load), trigger=CompareValueTrigger( "validation/main/acc", lambda best_value, current_value: best_value > current_value, ), ) trainer.extend( adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( "validation/main/acc", lambda best_value, current_value: best_value > current_value, ), ) elif args.criterion == "loss": trainer.extend( restore_snapshot(model, args.outdir + "/model.loss.best", load_fn=torch_load), trigger=CompareValueTrigger( "validation/main/loss", lambda best_value, current_value: best_value < current_value, ), ) trainer.extend( adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( "validation/main/loss", lambda best_value, current_value: best_value < current_value, ), ) elif args.opt == "adam": if args.criterion == "acc": trainer.extend( restore_snapshot(model, args.outdir + "/model.acc.best", load_fn=torch_load), trigger=CompareValueTrigger( "validation/main/acc", lambda best_value, current_value: best_value > current_value, ), ) trainer.extend( adam_lr_decay(args.lr_decay), trigger=CompareValueTrigger( "validation/main/acc", lambda best_value, current_value: best_value > current_value, ), ) elif args.criterion == "loss": trainer.extend( restore_snapshot(model, args.outdir + "/model.loss.best", load_fn=torch_load), trigger=CompareValueTrigger( "validation/main/loss", lambda best_value, current_value: best_value < current_value, ), ) trainer.extend( adam_lr_decay(args.lr_decay), trigger=CompareValueTrigger( "validation/main/loss", lambda best_value, current_value: best_value < current_value, ), ) # Write a log of evaluation statistics for each epoch trainer.extend( extensions.LogReport(trigger=(args.report_interval_iters, "iteration"))) report_keys = [ "epoch", "iteration", "main/loss", "main/loss_st", "main/loss_asr", "validation/main/loss", "validation/main/loss_st", "validation/main/loss_asr", "main/acc", "validation/main/acc", ] if args.asr_weight > 0: report_keys.append("main/acc_asr") report_keys.append("validation/main/acc_asr") report_keys += ["elapsed_time"] if args.opt == "adadelta": trainer.extend( extensions.observe_value( "eps", lambda trainer: trainer.updater.get_optimizer("main"). param_groups[0]["eps"], ), trigger=(args.report_interval_iters, "iteration"), ) report_keys.append("eps") elif args.opt in ["adam", "noam"]: trainer.extend( extensions.observe_value( "lr", lambda trainer: trainer.updater.get_optimizer("main"). param_groups[0]["lr"], ), trigger=(args.report_interval_iters, "iteration"), ) report_keys.append("lr") if args.asr_weight > 0: if args.mtlalpha > 0: report_keys.append("main/cer_ctc") report_keys.append("validation/main/cer_ctc") if args.mtlalpha < 1: if args.report_cer: report_keys.append("validation/main/cer") if args.report_wer: report_keys.append("validation/main/wer") if args.report_bleu: report_keys.append("main/bleu") report_keys.append("validation/main/bleu") trainer.extend( extensions.PrintReport(report_keys), trigger=(args.report_interval_iters, "iteration"), ) trainer.extend( extensions.ProgressBar(update_interval=args.report_interval_iters)) set_early_stop(trainer, args) if args.tensorboard_dir is not None and args.tensorboard_dir != "": trainer.extend( TensorboardLogger( SummaryWriter(args.tensorboard_dir), att_reporter=att_reporter, ctc_reporter=ctc_reporter, ), trigger=(args.report_interval_iters, "iteration"), ) # Run the training trainer.run() check_early_stop(trainer, args.epochs)
def train(args): """Train with the given args :param Namespace args: The program arguments """ set_deterministic_pytorch(args) # check cuda availability if not torch.cuda.is_available(): logging.warning('cuda is not available') # get input and output dimension info with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] utts = list(valid_json.keys()) idim = int(valid_json[utts[0]]['input'][0]['shape'][1]) odim = int(valid_json[utts[0]]['output'][0]['shape'][1]) logging.info('#input dims : ' + str(idim)) logging.info('#output dims: ' + str(odim)) # specify attention, CTC, hybrid mode if args.mtlalpha == 1.0: mtl_mode = 'ctc' logging.info('Pure CTC mode') elif args.mtlalpha == 0.0: mtl_mode = 'att' logging.info('Pure attention mode') else: mtl_mode = 'mtl' logging.info('Multitask learning mode') # specify model architecture model = E2E(idim, odim, args) subsampling_factor = model.subsample[0] if args.rnnlm is not None: rnnlm_args = get_model_conf(args.rnnlm, args.rnnlm_conf) rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(args.char_list), rnnlm_args.layer, rnnlm_args.unit)) torch.load(args.rnnlm, rnnlm) model.rnnlm = rnnlm # write model config if not os.path.exists(args.outdir): os.makedirs(args.outdir) model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to ' + model_conf) f.write( json.dumps((idim, odim, vars(args)), indent=4, ensure_ascii=False, sort_keys=True).encode('utf_8')) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) reporter = model.reporter # check the use of multi-gpu if args.ngpu > 1: model = torch.nn.DataParallel(model, device_ids=list(range(args.ngpu))) logging.info('batch size is automatically increased (%d -> %d)' % (args.batch_size, args.batch_size * args.ngpu)) args.batch_size *= args.ngpu # set torch device device = torch.device("cuda" if args.ngpu > 0 else "cpu") model = model.to(device) # Setup an optimizer if args.opt == 'adadelta': optimizer = torch.optim.Adadelta(model.parameters(), rho=0.95, eps=args.eps, weight_decay=args.weight_decay) elif args.opt == 'adam': optimizer = torch.optim.Adam(model.parameters(), weight_decay=args.weight_decay) # FIXME: TOO DIRTY HACK setattr(optimizer, "target", reporter) setattr(optimizer, "serialize", lambda s: reporter.serialize(s)) # Setup a converter converter = CustomConverter(subsampling_factor=subsampling_factor, preprocess_conf=args.preprocess_conf) # read json data with open(args.train_json, 'rb') as f: train_json = json.load(f)['utts'] with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] # make minibatch list (variable length) train = make_batchset(train_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, min_batch_size=args.ngpu if args.ngpu > 1 else 1) valid = make_batchset(valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, min_batch_size=args.ngpu if args.ngpu > 1 else 1) # hack to make batchsize argument as 1 # actual bathsize is included in a list if args.n_iter_processes > 0: train_iter = chainer.iterators.MultiprocessIterator( TransformDataset(train, converter.transform), batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) valid_iter = chainer.iterators.MultiprocessIterator( TransformDataset(valid, converter.transform), batch_size=1, repeat=False, shuffle=False, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) else: train_iter = chainer.iterators.SerialIterator(TransformDataset( train, converter.transform), batch_size=1) valid_iter = chainer.iterators.SerialIterator(TransformDataset( valid, converter.transform), batch_size=1, repeat=False, shuffle=False) # Set up a trainer updater = CustomUpdater(model, args.grad_clip, train_iter, optimizer, converter, device, args.ngpu) trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.outdir) # Resume from a snapshot if args.resume: logging.info('resumed from %s' % args.resume) torch_resume(args.resume, trainer) # Evaluate the model with the test dataset for each epoch trainer.extend( CustomEvaluator(model, valid_iter, reporter, converter, device)) # Save attention weight each epoch if args.num_save_attention > 0 and args.mtlalpha != 1.0: data = sorted(list(valid_json.items())[:args.num_save_attention], key=lambda x: int(x[1]['input'][0]['shape'][1]), reverse=True) if hasattr(model, "module"): att_vis_fn = model.module.calculate_all_attentions else: att_vis_fn = model.calculate_all_attentions att_reporter = PlotAttentionReport(att_vis_fn, data, args.outdir + "/att_ws", converter=converter, device=device) trainer.extend(att_reporter, trigger=(1, 'epoch')) else: att_reporter = None # Make a plot for training and validation values trainer.extend( extensions.PlotReport([ 'main/loss', 'validation/main/loss', 'main/loss_ctc', 'validation/main/loss_ctc', 'main/loss_att', 'validation/main/loss_att' ], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/acc', 'validation/main/acc'], 'epoch', file_name='acc.png')) # Save best models trainer.extend( snapshot_object(model, 'model.loss.best'), trigger=training.triggers.MinValueTrigger('validation/main/loss')) if mtl_mode != 'ctc': trainer.extend( snapshot_object(model, 'model.acc.best'), trigger=training.triggers.MaxValueTrigger('validation/main/acc')) # save snapshot which contains model and optimizer states trainer.extend(torch_snapshot(), trigger=(1, 'epoch')) # epsilon decay in the optimizer if args.opt == 'adadelta': if args.criterion == 'acc' and mtl_mode != 'ctc': trainer.extend(restore_snapshot(model, args.outdir + '/model.acc.best', load_fn=torch_load), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) elif args.criterion == 'loss': trainer.extend(restore_snapshot(model, args.outdir + '/model.loss.best', load_fn=torch_load), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL, 'iteration'))) report_keys = [ 'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att', 'validation/main/loss', 'validation/main/loss_ctc', 'validation/main/loss_att', 'main/acc', 'validation/main/acc', 'elapsed_time' ] if args.opt == 'adadelta': trainer.extend(extensions.observe_value( 'eps', lambda trainer: trainer.updater.get_optimizer('main'). param_groups[0]["eps"]), trigger=(REPORT_INTERVAL, 'iteration')) report_keys.append('eps') if args.report_cer: report_keys.append('validation/main/cer') if args.report_wer: report_keys.append('validation/main/wer') trainer.extend(extensions.PrintReport(report_keys), trigger=(REPORT_INTERVAL, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL)) set_early_stop(trainer, args) if args.tensorboard_dir is not None and args.tensorboard_dir != "": writer = SummaryWriter(args.tensorboard_dir) trainer.extend(TensorboardLogger(writer, att_reporter), trigger=(REPORT_INTERVAL, 'iteration')) # Run the training trainer.run() check_early_stop(trainer, args.epochs)
def train(**args): set_seed(42) args = EasyDict(args) logger.info(args) dataset_file = Path(args.dataset_file) data = json.loads(dataset_file.read_text()) ladder = data['ladder'] train_data, valid_data = data['train'], data['valid'] counter = Counter() pokes = train_data + valid_data for poke in pokes: counter.update(poke) counts = [0] * (args.topk + 1) index2poke = ['<unk>'] for i, (name, freq) in enumerate(counter.most_common()): if i < args.topk: counts[i + 1] = freq index2poke.append(name) else: counts[0] += freq vocab = {x: i for i, x in enumerate(index2poke)} n_vocab = len(vocab) logger.info('n_vocab = {}'.format(n_vocab)) train_data = vectorize(train_data, vocab) valid_data = vectorize(valid_data, vocab) X_valid, y_valid = convert(valid_data) X_train, y_train = convert(train_data) train = TupleDataset(X_train, y_train) valid = TupleDataset(X_valid, y_valid) logger.info('train size = {}'.format(len(train))) logger.info('valid size = {}'.format(len(valid))) train_iter = chainer.iterators.SerialIterator(train, 32) valid_iter = chainer.iterators.SerialIterator(valid, 32, repeat=False, shuffle=False) if args.loss_func == 'softmax': loss_func = SoftmaxCrossEntropyLoss(args.n_units, n_vocab) elif args.loss_func == 'ns': loss_func = L.NegativeSampling(args.n_units, counts, args.negative_size) loss_func.W.data[...] = 0 else: raise ValueError('invalid loss_func: {}'.format(args.loss_func)) prefix = '{}_{}_{}'.format(ladder, args.loss_func, args.n_units) model = ContinuousBoW(n_vocab, args.n_units, loss_func) optimizer = O.Adam() optimizer.setup(model) updater = training.updater.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (10, 'epoch'), out='results') trainer.extend(extensions.Evaluator(valid_iter, model)) trainer.extend(extensions.LogReport(log_name='{}_log'.format(prefix))) trainer.extend( extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss'])) trainer.extend(extensions.ProgressBar()) trainer.run() # Save the word2vec model Path('results').mkdir(exist_ok=True) poke2vec_file = 'results/{}_poke2vec.model'.format(prefix) with open(poke2vec_file, 'w') as f: f.write('%d %d\n' % (n_vocab, args.n_units)) w = model.embed.W.data for i, wi in enumerate(w): v = ' '.join(map(str, wi)) f.write('%s %s\n' % (index2poke[i], v))
def main(): # list of available GPUs devices = {'main':0, 'second':2, 'third':3, 'fourth':4, 'fifth':5} parser = argparse.ArgumentParser(description='Training of fully conncted newtork for indoor acoustic localization.') parser.add_argument('config', type=str, help="The config file for the training, model, and data.") parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--gpu', default='main', choices=devices.keys(), help='The GPU to use for the training') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') args = parser.parse_args() with open(args.config, 'r') as f: config = json.load(f) gpu = args.gpu epoch = config['training']['epoch'] batchsize = config['training']['batchsize'] out_dir = config['training']['out'] if 'out' in config['training'] else 'result' print('# Minibatch-size: {}'.format(batchsize)) print('# epoch: {}'.format(epoch)) print('') chainer.cuda.get_device_from_id(devices[gpu]).use() # Set up a neural network to train # Classifier reports mean squared error nn = models[config['model']['name']]( *config['model']['args'], **config['model']['kwargs'], ) model = L.Classifier(nn, lossfun=F.mean_squared_error) #model = L.Classifier(nn, lossfun=F.mean_absolute_error) model.compute_accuracy=False # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Helper to load the dataset data_formatter, label_formatter, skip = get_formatters(**config['data']['format_kwargs']) # Load the dataset train, validate, test = get_data(config['data']['file'], data_formatter=data_formatter, label_formatter=label_formatter, skip=skip) train_iter = chainer.iterators.SerialIterator(train, batchsize) validate_iter = chainer.iterators.SerialIterator(validate, batchsize, repeat=False, shuffle=False) # Set up a trainer #updater = training.ParallelUpdater(train_iter, optimizer, devices=devices) updater = training.StandardUpdater(train_iter, optimizer, device=devices[gpu]) trainer = training.Trainer(updater, (epoch, 'epoch'), out=out_dir) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(validate_iter, model, device=devices[gpu])) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run() # save the trained model chainer.serializers.save_npz(config['model']['file'], nn) return nn, train, test
def main(): archs = { 'alex': alex.Alex, 'alex_fp16': alex.AlexFp16, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50 } archs.update(dpns) parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=sorted(archs.keys()), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--gpus', '-g', type=int, nargs="*", default=[0, 1, 2, 3]) parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() # Initialize the model to train model = archs[args.arch]() if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) # Load the datasets and mean file mean = np.load(args.mean) train = train_imagenet.PreprocessedDataset(args.train, args.root, mean, model.insize) val = train_imagenet.PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. devices = tuple(args.gpus) train_iters = [ chainer.iterators.MultiprocessIterator(i, args.batchsize, n_processes=args.loaderjob) for i in chainer.datasets.split_dataset_n_random(train, len(devices)) ] val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Set up an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # Set up a trainer updater = updaters.MultiprocessParallelUpdater(train_iters, optimizer, devices=devices) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) if args.test: val_interval = 5, 'epoch' log_interval = 1, 'epoch' else: val_interval = 100000, 'iteration' log_interval = 1000, 'iteration' trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpus[0]), trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=2)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='voc0712') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(via_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset(BboxDataset(), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test = BboxDataset(split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) # 120000->8000 trainer = training.Trainer(updater, (500, 'iteration'), args.out) # 80000->5000,100000->7000 trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([300, 400], 'iteration')) # 10000->700 trainer.extend(DetectionEvaluator(test_iter, model, use_07_metric=True, label_names=via_bbox_label_names), trigger=(7, 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) # 10000->700 trainer.extend(extensions.snapshot(), trigger=(50, 'iteration')) # 120000->8000 trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(500, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run() serializers.save_npz('via_model', model) serializers.save_npz('via_state', optimizer)
def main(): parser = argparse.ArgumentParser(description='Train CycleGAN') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--base', '-B', default=os.path.dirname(os.path.abspath(__file__)), help='base directory path of program files') parser.add_argument('--config_path', type=str, default='configs/training.yml', help='path to config file') parser.add_argument('--out', '-o', default='results/training', help='Directory to output the result') parser.add_argument('--model', '-m', default='', help='Load model data') parser.add_argument('--model2', '-m2', default='', help='Load model data') parser.add_argument('--resume', '-res', default='', help='Resume the training from snapshot') parser.add_argument('--root', '-R', default=os.path.dirname(os.path.abspath(__file__)), help='Root directory path of input image') args = parser.parse_args() config = yaml_utils.Config( yaml.load(open(os.path.join(args.base, args.config_path)))) print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(config.batchsize)) print('# iteration: {}'.format(config.iteration)) print('Learning Rate: {}'.format(config.adam['alpha'])) print('') #load the dataset print('----- Load dataset -----') train = CycleganDataset(args.root, os.path.join(args.base, config.dataset['training_fn']), config.patch['patchside'], [config.patch['lrmin'], config.patch['lrmax']], augmentation=True) train_iter = chainer.iterators.MultiprocessIterator( train, batch_size=config.batchsize) print('----- Set up model ------') gen = Generator_SR() gen2 = Generator_SR() disY = Discriminator() # chainer.serializers.load_npz(args.model, gen) # chainer.serializers.load_npz(args.model2, gen2) if args.gpu >= 0: chainer.backends.cuda.set_max_workspace_size(1024 * 1024 * 1024) # 1GB chainer.backends.cuda.get_device_from_id(args.gpu).use() gen.to_gpu() gen2.to_gpu() disY.to_gpu() print('----- Make optimizer -----') def make_optimizer(model, alpha=0.00001, beta1=0.9, beta2=0.999): optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1, beta2=beta2) optimizer.setup(model) return optimizer gen_opt = make_optimizer(model=gen, alpha=config.adam['alpha'], beta1=config.adam['beta1'], beta2=config.adam['beta2']) gen2_opt = make_optimizer(model=gen2, alpha=config.adam['alpha'], beta1=config.adam['beta1'], beta2=config.adam['beta2']) disY_opt = make_optimizer(model=disY, alpha=config.adam['alpha'], beta1=config.adam['beta1'], beta2=config.adam['beta2']) print('----- Make updater -----') updater = CinCGANUpdater(models=(gen, gen2, disY), iterator=train_iter, optimizer={ 'gen': gen_opt, 'gen2': gen2_opt, 'disY': disY_opt }, device=args.gpu) print('----- Save configs -----') def create_result_dir(base_dir, output_dir, config_path, config): """https://github.com/pfnet-research/sngan_projection/blob/master/train.py""" result_dir = os.path.join(base_dir, output_dir) if not os.path.exists(result_dir): os.makedirs(result_dir) if not os.path.exists('{}/init'.format(result_dir)): os.makedirs('{}/init'.format(result_dir)) def copy_to_result_dir(fn, result_dir): bfn = os.path.basename(fn) shutil.copy(fn, '{}/{}'.format(result_dir, bfn)) copy_to_result_dir(os.path.join(base_dir, config_path), result_dir) copy_to_result_dir(os.path.join(base_dir, config.network['fn']), result_dir) copy_to_result_dir(os.path.join(base_dir, config.updater['fn']), result_dir) copy_to_result_dir( os.path.join(base_dir, config.dataset['training_fn']), result_dir) create_result_dir(args.base, args.out, args.config_path, config) print('----- Make trainer -----') trainer = training.Trainer(updater, (config.iteration, 'iteration'), out=os.path.join(args.base, args.out)) # Set up logging snapshot_interval = (config.snapshot_interval, 'iteration') display_interval = (config.display_interval, 'iteration') evaluation_interval = (config.evaluation_interval, 'iteration') trainer.extend( extensions.snapshot(filename='snapshot_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( gen, filename='gen_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( gen2, filename='gen2_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( disY, filename='disY_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=display_interval)) trainer.extend(reconstruct_hr_img(gen, gen2, os.path.join(args.base, args.out), train_iter, train), trigger=evaluation_interval, priority=extension.PRIORITY_WRITER) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar(update_interval=10)) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['gen/loss_gen1'], 'iteration', file_name='gen_loss.png', trigger=display_interval)) trainer.extend( extensions.PlotReport([ 'disY/loss_dis1_fake', 'disY/loss_dis1_real', 'disY/loss_dis1' ], 'iteration', file_name='dis_loss.png', trigger=display_interval)) trainer.extend( extensions.PlotReport(['gen/loss_gen', 'disY/loss_dis1'], 'iteration', file_name='adv_loss.png', trigger=display_interval)) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) print('----- Run the training -----') reset_seed(0) trainer.run()
def main(args): abs_dest = "/work/sasaki.shota/" if args.snapshot: start_time = datetime.now().strftime('%Y%m%d_%H_%M_%S') dest = "../result/" + start_time os.makedirs(dest) abs_dest = os.path.abspath(dest) with open(os.path.join(dest, "settings.json"), "w") as fo: fo.write(json.dumps(vars(args), sort_keys=True, indent=4)) # load data data_processor = CopaDataProcessor(args.data, args.vocab, args.test, args.gpu, args) data_processor.prepare_dataset() train_data = data_processor.train_data copa_data = data_processor.copa_data # create model vocab_c = data_processor.vocab_c vocab_r = data_processor.vocab_r embed_dim = args.dim cnn = ABCNN_2(n_vocab_c=len(vocab_c), n_vocab_r=len(vocab_r), n_layer=args.layer\ ,embed_dim=embed_dim, input_channel=1, output_channel=50,wordvec_unchain=args.wordvec_unchain) model = L.Classifier(cnn, lossfun=sigmoid_cross_entropy, accfun=binary_accuracy) if args.gpu >= 0: # cuda.get_device(str(args.gpu)).use() cuda.get_device(args.gpu).use() model.to_gpu() if args.word2vec: cnn.load_word2vec_embeddings(args.word2vec_path, data_processor.vocab_c, data_processor.vocab_r) cnn.pad_vec2zero(data_processor.vocab_c, data_processor.vocab_r) # setup optimizer optimizer = O.AdaGrad(args.lr) optimizer.setup(model) # do not use weight decay for embeddings decay_params = {name: 1 for name, variable in model.namedparams() if "embed" not in name} optimizer.add_hook(SelectiveWeightDecay(rate=args.decay, decay_params=decay_params)) # train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize) train_iter = IteratorWithNS(train_data, args.batchsize) dev_train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize, repeat=False) #for SVM copa_iter = COPAIterator(copa_data) updater = training.StandardUpdater(train_iter, optimizer, converter=concat_examples, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=abs_dest) # setup evaluation # eval_predictor = model.copy().predictor.to_cpu() eval_predictor = model.copy().predictor eval_predictor.train = False iters = {"train": dev_train_iter, "test": copa_iter} trainer.extend(COPAEvaluator(iters, eval_predictor, converter=concat_examples, device=args.gpu) , trigger=(1000,'iteration') ) # trainer.extend(COPAEvaluator(iters, eval_predictor, converter=concat_examples, device=args.gpu)) # extentions... trainer.extend(extensions.LogReport(trigger=(1000,'iteration')) ) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'copa_dev_acc', 'copa_test_acc']) ,trigger=(1000,'iteration') ) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.snapshot: trainer.extend(extensions.snapshot_object( model, 'model_epoch_{.updater.epoch}', trigger=chainer.training.triggers.MaxValueTrigger('validation/map'))) # trainer.extend(extensions.ExponentialShift("lr", 0.5, optimizer=optimizer), # trigger=chainer.training.triggers.MinValueTrigger("validation/loss")) trainer.run()
def main(): parser = argparse.ArgumentParser( description= 'Fully Convolutional Dual Center Pose Proposal Network for Pose Estimation' ) parser.add_argument('--batchsize', '-b', type=int, default=1, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=200, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='results/dual_cp', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--seed', type=int, default=0, help='Random seed') parser.add_argument('--snapshot_interval', type=int, default=1000, help='Interval of snapshot') parser.add_argument('--display_interval', type=int, default=100, help='Interval of displaying log to console') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--train_resnet', type=bool, default=True, help='train resnet') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') n_class = 9 train_path = os.path.join(os.getcwd(), root, 'train_data/OcclusionChallengeICCV2015') caffe_model = 'ResNet-50-model.caffemodel' distance_sanity = 0.05 chainer.using_config('cudnn_deterministic', True) model = DualCPNetClassifier(DualCenterProposalNetworkRes50FCN( n_class=n_class, output_scale=1.0, pretrained_model=not args.train_resnet), mothod="RANSAC", distance_sanity=distance_sanity) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # load train data train = DualCPNetDataset(train_path, range(0, 1200)[0::2], img_height=192, img_width=256, random=True, random_crop=True) # load test data test = DualCPNetDataset(train_path, range(0, 1200)[1::2], img_height=192, img_width=256) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch evaluator = extensions.Evaluator(test_iter, model, device=args.gpu) evaluator.default_name = 'val' trainer.extend(evaluator) # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir # if extensions.PlotReport.available(): # trainer.extend( # extensions.PlotReport(['main/loss'], # 'epoch', file_name='loss.png')) # trainer.extend( # extensions.PlotReport( # ['main/accuracy'], # 'epoch', file_name='accuracy.png')) trainer.extend( extensions.PrintReport([ 'epoch', 'main/l_cls', 'main/l_cp', 'main/l_ocp', 'main/cls_acc', 'main/cp_acc', 'main/ocp_acc', 'val/main/l_cls', 'val/main/l_cp', 'val/main/l_ocp', 'val/main/cls_acc', 'val/main/cp_acc', 'val/main/ocp_acc', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) else: npz_name = 'DualCenterProposalNetworkRes50FCN_occulusion_challenge.npz' caffemodel_name = 'ResNet-50-model.caffemodel' path = os.path.join(root, 'trained_data/', npz_name) path_caffemodel = os.path.join(root, 'trained_data/', caffemodel_name) print 'npz model path : ' + path print 'caffe model path : ' + path_caffemodel download.cache_or_load_file( path, lambda path: _make_chainermodel_npz(path, path_caffemodel, model, n_class), lambda path: serializers.load_npz(path, model)) # Run the training trainer.run()
def train(args=None): save_args(args) dataset = FoodDataset(dataset_dir=args.dataset, model_name=args.model_name, train=True) train_dataset, valid_dataset = split_dataset_random(dataset, int(0.9 * len(dataset)), seed=args.seed) train_iter = MultiprocessIterator(train_dataset, args.batch_size) val_iter = MultiprocessIterator(valid_dataset, args.batch_size, repeat=False, shuffle=False) if args.model_name == 'mv2': model = MobilenetV2(num_classes=101, depth_multiplier=1.0) elif args.model_name == "vgg16": model = VGG16(num_classes=101) elif args.model_name == "resnet50": model = ResNet50(num_classes=101) else: raise Exception("illegal model name") model = L.Classifier(model) if args.model_name == "mv2": optimizer = chainer.optimizers.SGD(lr=0.005) else: optimizer = chainer.optimizers.Adam() optimizer.setup(model) if args.model_name == "vgg16": model.predictor.disable_target_layers() if args.model_name == "resnet50": model.predictor.disable_target_layers() if args.device >= 0: chainer.backends.cuda.get_device_from_id(args.device).use() model.to_gpu() updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.destination) snapshot_interval = (1, 'epoch') trainer.extend(extensions.Evaluator(val_iter, model, device=args.device), trigger=snapshot_interval) trainer.extend(extensions.ProgressBar()) trainer.extend( extensions.LogReport(trigger=snapshot_interval, log_name='log.json')) trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( model, 'model_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png'), trigger=snapshot_interval) trainer.extend(extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png'), trigger=snapshot_interval) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def train(args): # display torch version logging.info('torch version = ' + torch.__version__) # seed setting nseed = args.seed torch.manual_seed(nseed) logging.info('torch seed = ' + str(nseed)) # debug mode setting # 0 would be fastest, but 1 seems to be reasonable # by considering reproducability # use determinisitic computation or not if args.debugmode < 1: torch.backends.cudnn.deterministic = False logging.info('torch cudnn deterministic is disabled') else: torch.backends.cudnn.deterministic = True # check cuda and cudnn availability if not torch.cuda.is_available(): logging.warning('cuda is not available') # get special label ids unk = args.char_list_dict['<unk>'] eos = args.char_list_dict['<eos>'] # read tokens as a sequence of sentences train = read_tokens(args.train_label, args.char_list_dict) val = read_tokens(args.valid_label, args.char_list_dict) # count tokens n_train_tokens, n_train_oovs = count_tokens(train, unk) n_val_tokens, n_val_oovs = count_tokens(val, unk) logging.info('#vocab = ' + str(args.n_vocab)) logging.info('#sentences in the training data = ' + str(len(train))) logging.info('#tokens in the training data = ' + str(n_train_tokens)) logging.info('oov rate in the training data = %.2f %%' % (n_train_oovs / n_train_tokens * 100)) logging.info('#sentences in the validation data = ' + str(len(val))) logging.info('#tokens in the validation data = ' + str(n_val_tokens)) logging.info('oov rate in the validation data = %.2f %%' % (n_val_oovs / n_val_tokens * 100)) # Create the dataset iterators train_iter = ParallelSentenceIterator(train, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos) val_iter = ParallelSentenceIterator(val, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos, repeat=False) logging.info('#iterations per epoch = ' + str(len(train_iter.batch_indices))) logging.info('#total iterations = ' + str(args.epoch * len(train_iter.batch_indices))) # Prepare an RNNLM model rnn = RNNLM(args.n_vocab, args.layer, args.unit) model = ClassifierWithState(rnn) if args.ngpu > 1: logging.warn("currently, multi-gpu is not supported. use single gpu.") if args.ngpu > 0: # Make the specified GPU current gpu_id = 0 model.cuda(gpu_id) else: gpu_id = -1 # Save model conf to json model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to ' + model_conf) f.write( json.dumps(vars(args), indent=4, sort_keys=True).encode('utf_8')) # Set up an optimizer if args.opt == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=1.0) elif args.opt == 'adam': optimizer = torch.optim.Adam(model.parameters()) # FIXME: TOO DIRTY HACK reporter = model.reporter setattr(optimizer, "target", reporter) setattr(optimizer, "serialize", lambda s: reporter.serialize(s)) updater = BPTTUpdater(train_iter, model, optimizer, gpu_id, gradclip=args.gradclip) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir) trainer.extend(LMEvaluator(val_iter, model, reporter, device=gpu_id)) trainer.extend( extensions.LogReport(postprocess=compute_perplexity, trigger=(REPORT_INTERVAL, 'iteration'))) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'perplexity', 'val_perplexity', 'elapsed_time']), trigger=(REPORT_INTERVAL, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL)) # Save best models trainer.extend(torch_snapshot(filename='snapshot.ep.{.updater.epoch}')) trainer.extend( extensions.snapshot_object(model, 'rnnlm.model.{.updater.epoch}', savefun=torch_save)) # T.Hori: MinValueTrigger should be used, but it fails when resuming trainer.extend( MakeSymlinkToBestModel('validation/main/loss', 'rnnlm.model')) if args.resume: logging.info('resumed from %s' % args.resume) torch_resume(args.resume, trainer) trainer.run() # compute perplexity for test set if args.test_label: logging.info('test the best model') torch_load(args.outdir + '/rnnlm.model.best', model) test = read_tokens(args.test_label, args.char_list_dict) n_test_tokens, n_test_oovs = count_tokens(test, unk) logging.info('#sentences in the test data = ' + str(len(test))) logging.info('#tokens in the test data = ' + str(n_test_tokens)) logging.info('oov rate in the test data = %.2f %%' % (n_test_oovs / n_test_tokens * 100)) test_iter = ParallelSentenceIterator(test, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos, repeat=False) evaluator = LMEvaluator(test_iter, model, reporter, device=gpu_id) result = evaluator() logging.info('test perplexity: ' + str(np.exp(float(result['main/loss']))))
def setup(): # 設定ファイルの読み込み with open(CONFIG_FILE, "r") as f: config = yaml.load(f) xp = np if not config["use_gpu"] else cuda.cupy # 学習結果出力先の設定 restart = config["restart_dir"] is not None if restart: result_children_dir = config["restart_dir"] else: result_children_dir = "result_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") result_dir = os.path.join(config["result_dir"], result_children_dir) result_dir_train = os.path.join(result_dir, MODEL_DIR) result_dir_val = os.path.join(result_dir, VALIDATE_DIR) # 学習データの読み込み train_scores = [] with open(os.path.join(config["score_dir"], config["train_list"]), "r") as tr_f: train_info = list(map(lambda x: x.split("\n")[0], tr_f.readlines())) train_paths = list(map(lambda x: os.path.join(config["score_dir"], x.split("\t")[0]), train_info)) train_score_lvs = list(map(lambda x: int(x.split("\t")[1])-1, train_info)) for idx, npy_path in enumerate(train_paths): score = xp.load(npy_path) score[:, 8] /= 100.0 # 譜面を小節ごとに区切る score = score.reshape((-1, 1728)) train_scores.append(score) sys.stdout.write("\rtrain score loaded: {0:4d}/{1}".format(idx+1, len(train_paths))) sys.stdout.write("\n") # 検証データの読み込み val_scores = [] val_score_names = [] with open(os.path.join(config["score_dir"], config["validate_list"]), "r") as val_f: val_info = list(map(lambda x: x.split("\n")[0], val_f.readlines())) val_paths = list(map(lambda x: os.path.join(config["score_dir"], x.split("\t")[0]), val_info)) val_score_lvs = list(map(lambda x: int(x.split("\t")[1])-1, val_info)) for idx, npy_path in enumerate(val_paths): score = xp.load(npy_path) score[:, 8] /= 100.0 # 譜面を小節ごとに区切る score = score.reshape((-1, 1728)) val_scores.append(score) score_name = os.path.basename(npy_path) val_score_names.append(score_name) sys.stdout.write("\rvalidate score loaded: {0:4d}/{1}".format(idx+1, len(val_paths))) sys.stdout.write("\n") # model and optimizer model = Estimator() if xp is not np: model.to_device("@cupy:0") optimizer = Adam(float(config["lr"])) optimizer.setup(model) # iterator, updater, trainer, extension train_dataset = TupleDataset(train_scores, train_score_lvs) train_iterator = SerialIterator(train_dataset, int(config["batch_size"])) val_dataset = TupleDataset(val_scores, val_score_lvs, val_score_names) val_iterator = SerialIterator(val_dataset, int(config["batch_size"]), repeat=False, shuffle=False) updater = EstimatorUpdater(iterator=train_iterator, optimizer=optimizer) trainer = Trainer(updater, stop_trigger=(config["epochs"], "epoch"), out=result_dir_train) trainer.extend(Validator(val_iterator, result_dir_val), trigger=(1, "epoch")) trainer.extend(extensions.snapshot(filename="snapshot_epoch_{.updater.epoch}")) trainer.extend(extensions.LogReport(trigger=(1, "epoch")), trigger=(1, "epoch")) trainer.extend(extensions.PrintReport(["epoch", "train/loss", "train/acc", "val/loss", "val/acc", "val/rough_acc"])) trainer.extend(extensions.ProgressBar(update_interval=5)) if restart: # 学習を再開するモデルを特定 snapshot_path_format = os.path.join(result_dir_train, "snapshot_epoch_*") snapshots = [os.path.basename(fname) for fname in glob.glob(snapshot_path_format)] if len(snapshots) == 0: print("There does not exist a model to restart training.") exit() else: pattern = re.compile("snapshot_epoch_([0-9]+)") snapshot_epochs = list(map(lambda x: int(pattern.search(x).group(1)), snapshots)) prev_snapshot_idx = snapshot_epochs.index(max(snapshot_epochs)) prev_snapshot = snapshots[prev_snapshot_idx] load_npz(os.path.join(result_dir_train, prev_snapshot), trainer) shutil.copy2(CONFIG_FILE, result_dir) return trainer
def main(): parser = argparse.ArgumentParser( description='Chainer example: POS-tagging') parser.add_argument('--batchsize', '-b', type=int, default=30, help='Number of images in each mini batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() if chainer.get_dtype() == numpy.float16: warnings.warn( 'This example may cause NaN in FP16 mode.', RuntimeWarning) vocab = collections.defaultdict(lambda: len(vocab)) pos_vocab = collections.defaultdict(lambda: len(pos_vocab)) # Convert word sequences and pos sequences to integer sequences. nltk.download('brown') data = [] for sentence in nltk.corpus.brown.tagged_sents(): xs = numpy.array([vocab[lex] for lex, _ in sentence], numpy.int32) ys = numpy.array([pos_vocab[pos] for _, pos in sentence], numpy.int32) data.append((xs, ys)) print('# of sentences: {}'.format(len(data))) print('# of words: {}'.format(len(vocab))) print('# of pos: {}'.format(len(pos_vocab))) device = chainer.get_device(args.device) device.use() model = CRF(len(vocab), len(pos_vocab)) model.to_device(device) optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) test_data, train_data = datasets.split_dataset_random( data, len(data) // 10, seed=0) train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize) test_iter = chainer.iterators.SerialIterator(test_data, args.batchsize, repeat=False, shuffle=False) updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=convert, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) evaluator = extensions.Evaluator( test_iter, model, device=device, converter=convert) # Only validate in each 1000 iteration trainer.extend(evaluator, trigger=(1000, 'iteration')) trainer.extend(extensions.LogReport(trigger=(100, 'iteration')), trigger=(100, 'iteration')) trainer.extend( extensions.MicroAverage( 'main/correct', 'main/total', 'main/accuracy')) trainer.extend( extensions.MicroAverage( 'validation/main/correct', 'validation/main/total', 'validation/main/accuracy')) trainer.extend( extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time']), trigger=(100, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: seq2seq') parser.add_argument('SOURCE', help='source sentence list') parser.add_argument('TARGET', help='target sentence list') parser.add_argument('SOURCE_VOCAB', help='source vocabulary file') parser.add_argument('TARGET_VOCAB', help='target vocabulary file') parser.add_argument('--validation-source', help='source sentence list for validation') parser.add_argument('--validation-target', help='target sentence list for validation') parser.add_argument('--batchsize', '-b', type=int, default=64, help='number of sentence pairs in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1024, help='number of units') parser.add_argument('--layer', '-l', type=int, default=3, help='number of layers') parser.add_argument('--min-source-sentence', type=int, default=1, help='minimium length of source sentence') parser.add_argument('--max-source-sentence', type=int, default=50, help='maximum length of source sentence') parser.add_argument('--min-target-sentence', type=int, default=1, help='minimium length of target sentence') parser.add_argument('--max-target-sentence', type=int, default=50, help='maximum length of target sentence') parser.add_argument('--out', '-o', default='result', help='directory to output the result') args = parser.parse_args() source_ids = load_vocabulary(args.SOURCE_VOCAB) target_ids = load_vocabulary(args.TARGET_VOCAB) train_source = load_data(source_ids, args.SOURCE) train_target = load_data(target_ids, args.TARGET) assert len(train_source) == len(train_target) train_data = [ (s, t) for s, t in six.moves.zip(train_source, train_target) if args.min_source_sentence <= len(s) <= args.max_source_sentence and args.min_source_sentence <= len(t) <= args.max_source_sentence ] train_source_unknown = calculate_unknown_ratio([s for s, _ in train_data]) train_target_unknown = calculate_unknown_ratio([t for _, t in train_data]) print('Source vocabulary size: %d' % len(source_ids)) print('Target vocabulary size: %d' % len(target_ids)) print('Train data size: %d' % len(train_data)) print('Train source unknown ratio: %.2f%%' % (train_source_unknown * 100)) print('Train target unknown ratio: %.2f%%' % (train_target_unknown * 100)) target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} model = Seq2seq(args.layer, len(source_ids), len(target_ids), args.unit) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) optimizer = chainer.optimizers.Adam() optimizer.setup(model) train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize) updater = training.StandardUpdater(train_iter, optimizer, converter=convert, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch')) trainer.extend(extensions.LogReport(trigger=(200, 'iteration'))) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/perp', 'validation/main/perp', 'validation/main/bleu', 'elapsed_time' ]), trigger=(200, 'iteration')) if args.validation_source and args.validation_target: test_source = load_data(source_ids, args.validation_source) test_target = load_data(target_ids, args.validation_target) assert len(test_source) == len(test_target) test_data = list(six.moves.zip(test_source, test_target)) test_data = [(s, t) for s, t in test_data if 0 < len(s) and 0 < len(t)] test_source_unknown = calculate_unknown_ratio( [s for s, _ in test_data]) test_target_unknown = calculate_unknown_ratio( [t for _, t in test_data]) print('Validation data: %d' % len(test_data)) print('Validation source unknown ratio: %.2f%%' % (test_source_unknown * 100)) print('Validation target unknown ratio: %.2f%%' % (test_target_unknown * 100)) @chainer.training.make_extension(trigger=(200, 'iteration')) def translate(trainer): source, target = test_data[numpy.random.choice(len(test_data))] result = model.translate([model.xp.array(source)])[0] source_sentence = ' '.join([source_words[x] for x in source]) target_sentence = ' '.join([target_words[y] for y in target]) result_sentence = ' '.join([target_words[y] for y in result]) print('# source : ' + source_sentence) print('# result : ' + result_sentence) print('# expect : ' + target_sentence) trainer.extend(translate, trigger=(4000, 'iteration')) trainer.extend(CalculateBleu(model, test_data, 'validation/main/bleu', device=args.gpu), trigger=(4000, 'iteration')) print('start training') trainer.run()