def setup_mnist_trainer(self, display_log=False): batchsize = 100 n_units = 100 comm = self.communicator model = L.Classifier(MLP(n_units, 10)) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater( train_iter, optimizer ) return updater, optimizer, train_iter, test_iter, model
def objective(trial, comm): # Sample an architecture. model = L.Classifier(create_model(trial)) # Setup optimizer. optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(model) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) # Setup dataset and iterator. Only worker 0 loads the whole dataset. # The dataset of worker 0 is evenly split and distributed to all workers. if comm.rank == 0: train, test = chainer.datasets.get_mnist() rng = np.random.RandomState(0) train = chainer.datasets.SubDataset( train, 0, N_TRAIN_EXAMPLES, order=rng.permutation(len(train))) test = chainer.datasets.SubDataset( test, 0, N_TEST_EXAMPLES, order=rng.permutation(len(test))) else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm) train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE, shuffle=True) test_iter = chainer.iterators.SerialIterator(test, BATCHSIZE, repeat=False, shuffle=False) # Setup trainer. updater = chainer.training.StandardUpdater(train_iter, optimizer) trainer = chainer.training.Trainer(updater, (EPOCH, 'epoch')) # Add Chainer extension for pruners. trainer.extend( optuna.integration.ChainerPruningExtension(trial, 'validation/main/accuracy', (PRUNER_INTERVAL, 'epoch'))) evaluator = chainer.training.extensions.Evaluator(test_iter, model) trainer.extend(chainermn.create_multi_node_evaluator(evaluator, comm)) log_report_extension = chainer.training.extensions.LogReport(log_name=None) trainer.extend(log_report_extension) if comm.rank == 0: trainer.extend(chainer.training.extensions.ProgressBar()) # Run training. # Please set show_loop_exception_msg False to inhibit messages about TrialPruned exception. # ChainerPruningExtension raises TrialPruned exception to stop training, and # trainer shows some messages every time it receive TrialPruned. trainer.run(show_loop_exception_msg=False) # Evaluate. evaluator = chainer.training.extensions.Evaluator(test_iter, model) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) report = evaluator() return report['main/accuracy']
def objective(trial, comm): # Sample an architecture. model = L.Classifier(create_model(trial)) # Setup optimizer. optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(model) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) # Setup dataset and iterator. Only worker 0 loads the whole dataset. # The dataset of worker 0 is evenly split and distributed to all workers. if comm.rank == 0: train, valid = chainer.datasets.get_mnist() rng = np.random.RandomState(0) train = chainer.datasets.SubDataset(train, 0, N_TRAIN_EXAMPLES, order=rng.permutation(len(train))) valid = chainer.datasets.SubDataset(valid, 0, N_VALID_EXAMPLES, order=rng.permutation(len(valid))) else: train, valid = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) valid = chainermn.scatter_dataset(valid, comm) train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE, shuffle=True) valid_iter = chainer.iterators.SerialIterator(valid, BATCHSIZE, repeat=False, shuffle=False) # Setup trainer. updater = chainer.training.StandardUpdater(train_iter, optimizer) trainer = chainer.training.Trainer(updater, (EPOCH, "epoch")) if comm.rank == 0: trainer.extend(chainer.training.extensions.ProgressBar()) # Run training. trainer.run() # Evaluate. evaluator = chainer.training.extensions.Evaluator(valid_iter, model) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) report = evaluator() return report["main/accuracy"]
def objective(trial, comm): device = comm.intra_rank chainer.cuda.get_device_from_id(device).use() # Sample an architecture. model = L.Classifier(create_model(trial)) model.to_gpu() # Setup optimizer. optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(model) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) # Setup dataset and iterator. Only worker 0 loads the whole dataset. # The dataset of worker 0 is evenly split and distributed to all workers. if comm.rank == 0: train, test = chainer.datasets.get_mnist() rng = np.random.RandomState(0) train = chainer.datasets.SubDataset( train, 0, N_TRAIN_EXAMPLES, order=rng.permutation(len(train))) test = chainer.datasets.SubDataset( test, 0, N_TEST_EXAMPLES, order=rng.permutation(len(test))) else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm) train_iter = chainer.iterators.SerialIterator( train, BATCHSIZE, shuffle=True) test_iter = chainer.iterators.SerialIterator( test, BATCHSIZE, repeat=False, shuffle=False) # Setup trainer. updater = chainer.training.StandardUpdater( train_iter, optimizer, device=device) trainer = chainer.training.Trainer(updater, (EPOCH, 'epoch')) if comm.rank == 0: trainer.extend(chainer.training.extensions.ProgressBar()) # Run training. trainer.run() # Evaluate. evaluator = chainer.training.extensions.Evaluator( test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) report = evaluator() return report['main/accuracy']
def test_mnist(self, display_log=True): # This test file is intended to be run on Travis-CI and # GPU is not used for now. epoch = 5 batchsize = 100 n_units = 100 comm = chainermn.create_communicator('naive') model = L.Classifier(MLP(n_units, 10)) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm) test = chainermn.scatter_dataset(test, comm) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (epoch, 'epoch')) # Wrap standard Chainer evaluators by MultiNodeEvaluator. evaluator = extensions.Evaluator(test_iter, model) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0 and display_log: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ], out=sys.stderr), trigger=(1, 'epoch')) trainer.run() err = evaluator()['validation/main/accuracy'] self.assertGreaterEqual(err, 0.95)
def objective(trial, comm): # Sample an architecture. model = L.Classifier(create_model(trial)) # Setup optimizer. optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(model) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) # Setup dataset and iterator. train, test = chainer.datasets.get_mnist() rng = np.random.RandomState(0) train = chainer.datasets.SubDataset(train, 0, N_TRAIN_EXAMPLES, order=rng.permutation(len(train))) test = chainer.datasets.SubDataset(test, 0, N_TEST_EXAMPLES, order=rng.permutation(len(test))) train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm) train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE, shuffle=True) test_iter = chainer.iterators.SerialIterator(test, BATCHSIZE, repeat=False, shuffle=False) # Setup trainer. updater = chainer.training.StandardUpdater(train_iter, optimizer) trainer = chainer.training.Trainer(updater, (EPOCH, 'epoch')) if comm.rank == 0: trainer.extend(chainer.training.extensions.ProgressBar()) # Run training. trainer.run() # Evaluate. evaluator = chainer.training.extensions.Evaluator(test_iter, model) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) report = evaluator() return 1.0 - report['main/accuracy']
def _prepare_multinode_snapshot(n, result): n_units = 100 batchsize = 10 comm = create_communicator('naive') model = L.Classifier(MLP(n_units, 10)) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) if comm.rank == 0: train, _ = chainer.datasets.get_mnist() else: train, _ = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, batchsize) updater = StandardUpdater(train_iter, optimizer) trainer = Trainer(updater, out=result) snapshot = extensions.snapshot(target=updater, autoload=True) replica_sets = [] mn_snapshot = multi_node_snapshot(comm, snapshot, replica_sets) mn_snapshot.initialize(trainer) for _ in range(n): updater.update() return updater, mn_snapshot, trainer
def scatter_large_data(self, comm_type): comm = self.communicator if comm.rank == 0: data = ["test"] * 2000000000 data = chainermn.scatter_dataset(data, comm) else: data = [] data = scatter_dataset(data, comm)
def train(x_data, t_data, batchsize=128, layer=1, in_units=1, hidden_units=5, out_units=1): comm = chainermn.create_communicator('naive') # Iterator batchsize = batchsize x_data = chainermn.scatter_dataset(x_data, comm) t_data = chainermn.scatter_dataset(t_data, comm) train_iter = iterators.SerialIterator(x_data, batchsize) test_iter = iterators.SerialIterator(t_data, batchsize, repeat=False, shuffle=False) # setup model model = LSTM(in_units, hidden_units, out_units) # setup optimizer optimizer = chainermn.create_multi_node_optimizer(optimizers.Adam(), comm) optimizer.setup(model) updater = training.StandardUpdater(train_iter, optimizer, MyConverter) trainer = training.Trainer(updater, (20, 'epoch'), out='result') if comm.rank == 0: trainer.extend(extensions.LogReport()) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.observe_lr()) trainer.extend(extensions.Evaluator(test_iter, model, MyConverter), name='val') trainer.extend( extensions.PrintReport( ['epoch', 'main/loss', 'val/main/loss', 'elapsed_time', 'lr'])) trainer.extend( extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png')) # trainer.extend(extensions.ProgressBar()) trainer.run()
def get_dataset(args, comm, model): mean = np.load(args.mean) if args.loadtype == 'development': if comm.rank == 0: train = dlframeworks.chainer.datasets.read_pairs(args.train) val = dlframeworks.chainer.datasets.read_pairs(args.val) else: train = None val = None train = chainermn.scatter_dataset(train, comm, shuffle=True) val = chainermn.scatter_dataset(val, comm) train = dlframeworks.chainer.datasets.CroppingDataset( train, args.train_root, mean, model.insize, model.insize) val = dlframeworks.chainer.datasets.CroppingDataset( val, args.val_root, mean, model.insize, model.insize, False) else: raise NotImplementedError('Invalid loadtype: {}'.format(args.loadtype)) return train, val
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset', choices=('cityscapes', 'ade20k', 'camvid')) parser.add_argument('--model', choices=('pspnet_resnet101', 'segnet')) parser.add_argument('--pretrained-model') parser.add_argument('--input-size', type=int, default=None) args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank dataset, label_names, model = get_dataset_and_model( args.dataset, args.model, args.pretrained_model, (args.input_size, args.input_size)) assert len(dataset) % comm.size == 0, \ "The size of the dataset should be a multiple "\ "of the number of GPUs" chainer.cuda.get_device_from_id(device).use() model.to_gpu() if comm.rank == 0: indices = np.arange(len(dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm) dataset = dataset.slice[indices] it = iterators.SerialIterator(dataset, 1, repeat=False, shuffle=False) in_values, out_values, rest_values = apply_to_iterator(model.predict, it, hook=ProgressHook( len(dataset))) # Delete an iterator of images to save memory usage. del in_values pred_labels, = out_values gt_labels, = rest_values confusion = calc_semantic_segmentation_confusion(pred_labels, gt_labels) confusion = comm.allreduce(confusion) if comm.rank == 0: iou = calc_semantic_segmentation_iou(confusion) pixel_accuracy = np.diag(confusion).sum() / confusion.sum() class_accuracy = np.diag(confusion) / np.sum(confusion, axis=1) for iu, label_name in zip(iou, label_names): print('{:>23} : {:.4f}'.format(label_name, iu)) print('=' * 34) print('{:>23} : {:.4f}'.format('mean IoU', np.nanmean(iou))) print('{:>23} : {:.4f}'.format('Class average accuracy', np.nanmean(class_accuracy))) print('{:>23} : {:.4f}'.format('Global average accuracy', pixel_accuracy))
def get_model_and_data( affordance, batch_size=1, comm=None, modal='rgb', augmentation=True, resolution=30, ): if affordance == 'suction': dataset_train = grasp_fusion.datasets.SuctionDataset( 'train', augmentation=augmentation, ) dataset_test = grasp_fusion.datasets.SuctionDataset('test') else: assert affordance == 'pinch' dataset_train = grasp_fusion.datasets.PinchDataset( 'train', augmentation=augmentation, resolution=resolution, ) dataset_test = grasp_fusion.datasets.PinchDataset( 'test', resolution=resolution, ) channel_names = dataset_train.channel_names out_channels = len(channel_names) predictor = grasp_fusion.models.FCN8sVGG16Sigmoid( out_channels=out_channels, modal=modal, ) model = grasp_fusion.models.FCNSigmoidTrainChain(predictor) if comm: import chainermn if comm.rank != 0: dataset_train = None dataset_train = chainermn.scatter_dataset( dataset_train, comm, shuffle=True ) iter_train = chainer.iterators.SerialIterator( chainer.datasets.TransformDataset( dataset_train, lambda x: transform(x, model=predictor, train=True), ), batch_size=batch_size, ) iter_test = chainer.iterators.SerialIterator( chainer.datasets.TransformDataset( dataset_test, lambda x: transform(x, model=predictor, train=False), ), batch_size=1, repeat=False, shuffle=False, ) return model, iter_train, iter_test, channel_names
def scatter_train_and_val_data(train_data, val_data, comm): if comm.rank == 0: indices = np.arange(len(train_data)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_data = train_data.slice[indices] indices = np.arange(len(val_data)) indices = indices[comm.rank:indices.size:comm.size] val_data = val_data.slice[indices] return train_data, val_data
def _setup_datasets(config, comm, is_master): if is_master: if config['dataset_name'] == 'msd_bound': train_data = MSDBoundDataset(config, config['train_list_path']) validation_data = MSDBoundDataset(config, config['validation_list_path']) test_data = MSDBoundDataset(config, config['test_list_path']) validation_data.random_scale = False test_data.random_scale = False validation_data.shift_intensity = 0 test_data.shift_intensity = 0 validation_data.random_flip = False test_data.random_flip = False validation_data.nb_copies = 1 test_data.nb_copies = 1 validation_data.training = False test_data.training = False else: raise ValueError('Unknown dataset_name: {}'.format( config['dataset_name'])) print('Training dataset size: {}'.format(len(train_data))) print('Validation dataset size: {}'.format(len(validation_data))) print('Test dataset size: {}'.format(len(test_data))) else: train_data = None validation_data = None test_data = None # scatter dataset if comm is not None: train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) validation_data = chainermn.scatter_dataset(validation_data, comm, shuffle=True) test_data = chainermn.scatter_dataset(test_data, comm, shuffle=True) return train_data, validation_data, test_data
def check_scatter_dataset(self, original_dataset, shuffle=False, root=0): if self.communicator.rank != root: original_dataset = None my_dataset = chainermn.scatter_dataset(original_dataset, self.communicator, shuffle=shuffle, root=root) sub_datasets = self.communicator.gather_obj(my_dataset, root=root) if self.communicator.rank == root: # Test the sizes sub_sizes = [len(sub_dataset) for sub_dataset in sub_datasets] self.assertEqual(len(set(sub_sizes)), 1) sub_size = sub_sizes[0] self.assertLessEqual(len(original_dataset), sub_size * self.mpi_comm.size) self.assertGreater(len(original_dataset), (sub_size - 1) * self.mpi_comm.size) # Test the content of scattered datasets joined_dataset = sum( (sub_dataset[:] for sub_dataset in sub_datasets), []) # NOTE: The values in `original_dataset` and # `joined_dataset` must be casted to int to compare. # There are 2 backgrounds on this issue. # # (1) numpy and cupy/chainerx have different behaviours on # 1-element array. Numpy implicitly converts a 1-element array to # a scalar value. # type(numpy.array([1])[0]) # => <class 'numpy.int64'> # Scalar # type(chainerx.array([1])[0]) # => <class 'chainerx.ndarray'> # array of one element # # (2) Two different ChainerX arrays are never identical in the # context of `set()`. # set([chainerx.array([0]), chainerx.array([0])]) # => {array([0], shape=(1,), dtype=int64, device='native:0'), # array([0], shape=(1,), dtype=int64, device='native:0')} joined_dataset = [int(e) for e in joined_dataset] original_dataset = [int(e) for e in original_dataset] self.assertEqual(set(joined_dataset), set(original_dataset))
def check_scatter_dataset(self, original_dataset): my_dataset = chainermn.scatter_dataset(original_dataset, self.communicator) sub_datasets = self.mpi_comm.gather(my_dataset) if self.mpi_comm.rank == 0: # Test the sizes sub_sizes = [len(sub_dataset) for sub_dataset in sub_datasets] self.assertEqual(len(set(sub_sizes)), 1) sub_size = sub_sizes[0] self.assertLessEqual(len(original_dataset), sub_size * self.mpi_comm.size) self.assertGreater(len(original_dataset), (sub_size - 1) * self.mpi_comm.size) # Test the content of scattered datasets joined_dataset = sum( (sub_dataset[:] for sub_dataset in sub_datasets), []) self.assertEqual(set(joined_dataset), set(original_dataset))
def get_data(name, batch_size=1, comm=None, extractor='res'): if name == 'voc': dataset_train = fcn.datasets.SBDClassSeg(split='train') dataset_valid = fcn.datasets.VOC2011ClassSeg(split='seg11valid') else: assert name == 'occlusion' dataset_train = OcclusionSegmentationDataset(split='train') dataset_valid = OcclusionSegmentationDataset(split='test') class_names = dataset_train.class_names if comm: import chainermn if comm.rank != 0: dataset_train = None dataset_train = chainermn.scatter_dataset( dataset_train, comm, shuffle=True ) iter_train = chainer.iterators.SerialIterator( chainer.datasets.TransformDataset( dataset_train, lambda x: transform(x, extractor=extractor), ), batch_size=batch_size, ) iter_valid_raw = chainer.iterators.SerialIterator( chainer.datasets.TransformDataset(dataset_valid, transform_size), batch_size=1, repeat=False, shuffle=False, ) iter_valid = chainer.iterators.SerialIterator( chainer.datasets.TransformDataset( dataset_valid, lambda x: transform(x, extractor=extractor), ), batch_size=1, repeat=False, shuffle=False, ) return class_names, iter_train, iter_valid, iter_valid_raw
def check_scatter_dataset(self, original_dataset, shuffle=False, root=0): my_dataset = chainermn.scatter_dataset( original_dataset, self.communicator, shuffle=shuffle, root=root) sub_datasets = self.communicator.gather_obj(my_dataset, root=root) if self.communicator.rank == root: # Test the sizes sub_sizes = [len(sub_dataset) for sub_dataset in sub_datasets] self.assertEqual(len(set(sub_sizes)), 1) sub_size = sub_sizes[0] self.assertLessEqual( len(original_dataset), sub_size * self.mpi_comm.size) self.assertGreater( len(original_dataset), (sub_size - 1) * self.mpi_comm.size) # Test the content of scattered datasets joined_dataset = sum((sub_dataset[:] for sub_dataset in sub_datasets), []) self.assertEqual(set(joined_dataset), set(original_dataset))
def make_dataset(self, stage_int): if self.is_master: size = 4 * (2**((stage_int + 1) // 2)) _dataset = MultiDataset(json.load(open(FLAGS.dataset_config, 'r')), '%dx%d' % (size, size), [[ "resize", { "probability": 1, "width": size, "height": size, "resample_filter": "ANTIALIAS" } ]]) self.print_log('Add (master) dataset for size {}'.format(size)) else: _dataset = None self.print_log('Add (slave) dataset') if self.use_mpi: _dataset = chainermn.scatter_dataset(_dataset, self.comm) return _dataset
def check_scatter_dataset(self, original_dataset, shuffle=False, root=0): if self.communicator.rank != root: original_dataset = None my_dataset = chainermn.scatter_dataset( original_dataset, self.communicator, shuffle=shuffle, root=root) sub_datasets = self.communicator.gather_obj(my_dataset, root=root) if self.communicator.rank == root: # Test the sizes sub_sizes = [len(sub_dataset) for sub_dataset in sub_datasets] self.assertEqual(len(set(sub_sizes)), 1) sub_size = sub_sizes[0] self.assertLessEqual( len(original_dataset), sub_size * self.mpi_comm.size) self.assertGreater( len(original_dataset), (sub_size - 1) * self.mpi_comm.size) # Test the content of scattered datasets joined_dataset = sum((sub_dataset[:] for sub_dataset in sub_datasets), []) self.assertEqual(set(joined_dataset), set(original_dataset))
def read_by_list(comm, files, root, cnt_classes, size, random_crop): if comm.rank == 0: print("reading file list") locations = read_locations(files, root, cnt_classes) print("cnt samples global:", len(locations)) else: locations = None locations = chainermn.scatter_dataset(locations, comm, shuffle=True) #print("cnt samples local:", len(locations)) mean = np.ones((3, 256, 256), dtype=np.float32) * 128.0 images = [] for loc in locations: #if not os.path.isfile(loc[0]): #print(f"{MPI.Get_processor_name()}, missing {loc[0]}") img = read_image(loc[0]) images.append((img, loc[1])) # images = chainer.datasets.LabeledImageDataset(locations, "./") ds = PreprocessedDataset(base=images, mean=mean, crop_size=224, random_crop=random_crop) #print(f"{MPI.Get_processor_name()}, done") #exit(-1) return ds
def train(args, train_data, test_data, evaluator_type): required_args = [ 'dataset', 'class_names', 'logs_dir', 'min_size', 'max_size', 'anchor_scales', ] for arg_key in required_args: if not hasattr(args, arg_key): raise ValueError( 'args must contain required key: {}'.format(arg_key) ) assert evaluator_type in ['voc', 'coco'], \ 'Unsupported evaluator_type: {}'.format(evaluator_type) if args.multi_node: import chainermn comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank args.n_node = comm.inter_size args.n_gpu = comm.size chainer.cuda.get_device_from_id(device).use() else: if args.gpu is None: print( 'Option --gpu is required without --multi-node.', file=sys.stderr, ) sys.exit(1) args.n_node = 1 args.n_gpu = 1 chainer.cuda.get_device_from_id(args.gpu).use() device = args.gpu args.seed = 0 now = datetime.datetime.now() args.timestamp = now.isoformat() args.out = osp.join(args.logs_dir, now.strftime('%Y%m%d_%H%M%S')) args.batch_size = args.batch_size_per_gpu * args.n_gpu # lr: 0.00125 * 8 = 0.01 in original args.lr = 0.00125 * args.batch_size args.weight_decay = 0.0001 # lr / 10 at 120k iteration with # 160k iteration * 16 batchsize in original args.step_size = [ (120e3 / 180e3) * args.max_epoch, (160e3 / 180e3) * args.max_epoch, ] random.seed(args.seed) np.random.seed(args.seed) if args.pooling_func == 'align': pooling_func = cmr.functions.roi_align_2d elif args.pooling_func == 'pooling': pooling_func = cmr.functions.roi_pooling_2d elif args.pooling_func == 'resize': pooling_func = cmr.functions.crop_and_resize else: raise ValueError( 'Unsupported pooling_func: {}'.format(args.pooling_func) ) if args.initializer == 'normal': mask_initialW = chainer.initializers.Normal(0.01) elif args.initializer == 'he_normal': mask_initialW = chainer.initializers.HeNormal(fan_option='fan_out') else: raise ValueError( 'Unsupported initializer: {}'.format(args.initializer) ) if args.model in ['resnet50', 'resnet101']: n_layers = int(args.model.lstrip('resnet')) mask_rcnn = cmr.models.MaskRCNNResNet( n_layers=n_layers, n_fg_class=len(args.class_names), pooling_func=pooling_func, anchor_scales=args.anchor_scales, roi_size=args.roi_size, min_size=args.min_size, max_size=args.max_size, mask_initialW=mask_initialW, ) else: raise ValueError('Unsupported model: {}'.format(args.model)) model = cmr.models.MaskRCNNTrainChain(mask_rcnn) if args.multi_node or args.gpu >= 0: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) if args.multi_node: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) if args.model in ['resnet50', 'resnet101']: # ResNetExtractor.freeze_at is not enough to freeze params # since WeightDecay updates the param little by little. mask_rcnn.extractor.conv1.disable_update() mask_rcnn.extractor.bn1.disable_update() mask_rcnn.extractor.res2.disable_update() for link in mask_rcnn.links(): if isinstance(link, cmr.links.AffineChannel2D): link.disable_update() train_data = chainer.datasets.TransformDataset( train_data, cmr.datasets.MaskRCNNTransform(mask_rcnn), ) test_data = chainer.datasets.TransformDataset( test_data, cmr.datasets.MaskRCNNTransform(mask_rcnn, train=False), ) if args.multi_node: if comm.rank != 0: train_data = None test_data = None train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) test_data = chainermn.scatter_dataset(test_data, comm) # FIXME: MultiProcessIterator sometimes hangs train_iter = chainer.iterators.SerialIterator( train_data, batch_size=args.batch_size_per_gpu, ) test_iter = chainer.iterators.SerialIterator( test_data, batch_size=args.batch_size_per_gpu, repeat=False, shuffle=False, ) converter = functools.partial( cmr.datasets.concat_examples, padding=0, # img, bboxes, labels, masks, scales indices_concat=[0, 2, 3, 4], # img, _, labels, masks, scales indices_to_device=[0, 1], # img, bbox ) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=device, converter=converter, ) trainer = training.Trainer( updater, (args.max_epoch, 'epoch'), out=args.out, ) trainer.extend( extensions.ExponentialShift('lr', 0.1), trigger=training.triggers.ManualScheduleTrigger( args.step_size, 'epoch', ), ) eval_interval = 1, 'epoch' log_interval = 20, 'iteration' plot_interval = 0.1, 'epoch' print_interval = 20, 'iteration' if evaluator_type == 'voc': evaluator = cmr.extensions.InstanceSegmentationVOCEvaluator( test_iter, model.mask_rcnn, device=device, use_07_metric=True, label_names=args.class_names, ) elif evaluator_type == 'coco': evaluator = cmr.extensions.InstanceSegmentationCOCOEvaluator( test_iter, model.mask_rcnn, device=device, label_names=args.class_names, ) else: raise ValueError( 'Unsupported evaluator_type: {}'.format(evaluator_type) ) if args.multi_node: evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=eval_interval) if not args.multi_node or comm.rank == 0: # Save snapshot. trainer.extend( extensions.snapshot_object(model.mask_rcnn, 'snapshot_model.npz'), trigger=training.triggers.MaxValueTrigger( 'validation/main/map', eval_interval, ), ) # Dump params.yaml. args.git_hash = cmr.utils.git_hash() args.hostname = socket.gethostname() trainer.extend(fcn.extensions.ParamsReport(args.__dict__)) # Visualization. trainer.extend( cmr.extensions.InstanceSegmentationVisReport( test_iter, model.mask_rcnn, label_names=args.class_names, ), trigger=eval_interval, ) # Logging. trainer.extend( chainer.training.extensions.observe_lr(), trigger=log_interval, ) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend( extensions.PrintReport( [ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ], ), trigger=print_interval, ) trainer.extend(extensions.ProgressBar(update_interval=10)) # Plot. assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport( [ 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', ], file_name='loss.png', trigger=plot_interval, ), trigger=plot_interval, ) trainer.extend( extensions.PlotReport( ['validation/main/map'], file_name='accuracy.png', trigger=plot_interval, ), trigger=eval_interval, ) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser(description='''\ ChainerMN example: MNIST with automatic checkpoints enabled''') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--run-id', type=str, default='train-mnist-example', help='ID of the task name') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = L.Classifier(MLP(args.unit, 10)) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Enable checkpointer and recover from checkpoint if any checkpoint exists checkpointer = create_multi_node_checkpointer(name=args.run_id, comm=comm) checkpointer.maybe_load(trainer, optimizer) print("Rank", comm.rank, ": (Re)Starting from (epoch, iter) =", (trainer.updater.epoch, trainer.updater.iteration)) trainer.extend(checkpointer, trigger=(1000, 'iteration')) # Create a multi node evaluator from a standard Chainer evaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: seq2seq') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--bleu', action="store_true", default=False, help='Report BLEU score') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--cache', '-c', default=None, help='Directory to cache pre-processed dataset') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1024, help='Number of units') parser.add_argument('--communicator', default='hierarchical', help="Type of communicator") parser.add_argument('--stop', '-s', type=str, default="15e", help='Stop trigger (ex. "500i", "15e")') parser.add_argument('--input', '-i', type=str, default='wmt', help='Input directory') parser.add_argument('--optimizer', type=str, default="adam()", help="Optimizer and its argument") parser.add_argument('--out', '-o', default='result', help='Directory to output the result') args = parser.parse_args() # Prepare ChainerMN communicator if args.gpu: comm = chainermn.create_communicator('hierarchical') dev = comm.intra_rank else: comm = chainermn.create_communicator('naive') dev = -1 if comm.mpi_comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size())) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('==========================================') # Rank 0 prepares all data if comm.rank == 0: if args.cache and not os.path.exists(args.cache): os.mkdir(args.cache) # Read source data bt = time.time() if args.cache: cache_file = os.path.join(args.cache, 'source.pickle') source_vocab, source_data = cached_call(cache_file, read_source, args.input, args.cache) else: source_vocab, source_data = read_source(args.input, args.cache) et = time.time() print("RD source done. {:.3f} [s]".format(et - bt)) sys.stdout.flush() # Read target data bt = time.time() if args.cache: cache_file = os.path.join(args.cache, 'target.pickle') target_vocab, target_data = cached_call(cache_file, read_target, args.input, args.cache) else: target_vocab, target_data = read_target(args.input, args.cache) et = time.time() print("RD target done. {:.3f} [s]".format(et - bt)) sys.stdout.flush() print('Original training data size: %d' % len(source_data)) train_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) < 50 and 0 < len(t) < 50] print('Filtered training data size: %d' % len(train_data)) en_path = os.path.join(args.input, 'dev', 'newstest2013.en') source_data = europal.make_dataset(en_path, source_vocab) fr_path = os.path.join(args.input, 'dev', 'newstest2013.fr') target_data = europal.make_dataset(fr_path, target_vocab) assert(len(source_data) == len(target_data)) test_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) and 0 < len(t)] source_ids = {word: index for index, word in enumerate(source_vocab)} target_ids = {word: index for index, word in enumerate(target_vocab)} else: # target_data, source_data = None, None train_data, test_data = None, None target_ids, source_ids = None, None # Print GPU id for i in range(0, comm.size): if comm.rank == i: print("Rank {} GPU: {}".format(comm.rank, dev)) sys.stdout.flush() comm.mpi_comm.Barrier() # broadcast id- > word dictionary source_ids = comm.mpi_comm.bcast(source_ids, root=0) target_ids = comm.mpi_comm.bcast(target_ids, root=0) target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} if comm.rank == 0: print("target_words : {}".format(len(target_words))) print("source_words : {}".format(len(source_words))) model = Seq2seq(3, len(source_ids), len(target_ids), args.unit) if dev >= 0: chainer.cuda.get_device(dev).use() model.to_gpu(dev) # determine the stop trigger m = re.match(r'^(\d+)e$', args.stop) if m: trigger = (int(m.group(1)), 'epoch') else: m = re.match(r'^(\d+)i$', args.stop) if m: trigger = (int(m.group(1)), 'iteration') else: if comm.rank == 0: sys.stderr.write("Error: unknown stop trigger: {}".format( args.stop)) exit(-1) if comm.rank == 0: print("Trigger: {}".format(trigger)) optimizer = chainermn.create_multi_node_optimizer( create_optimizer(args.optimizer), comm) optimizer.setup(model) # Broadcast dataset # Sanity check of train_data train_data = chainermn.scatter_dataset(train_data, comm) test_data = chainermn.scatter_dataset(test_data, comm) train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize, shuffle=False) updater = training.StandardUpdater( train_iter, optimizer, converter=convert, device=dev) trainer = training.Trainer(updater, trigger, out=args.out) trainer.extend(chainermn.create_multi_node_evaluator( BleuEvaluator(model, test_data, device=dev, comm=comm), comm)) def translate_one(source, target): words = europal.split_sentence(source) print('# source : ' + ' '.join(words)) x = model.xp.array( [source_ids.get(w, 1) for w in words], 'i') ys = model.translate([x])[0] words = [target_words[y] for y in ys] print('# result : ' + ' '.join(words)) print('# expect : ' + target) # @chainer.training.make_extension(trigger=(200, 'iteration')) def translate(trainer): translate_one( 'Who are we ?', 'Qui sommes-nous?') translate_one( 'And it often costs over a hundred dollars ' + 'to obtain the required identity card .', 'Or, il en coûte souvent plus de cent dollars ' + 'pour obtenir la carte d\'identité requise.') source, target = test_data[numpy.random.choice(len(test_data))] source = ' '.join([source_words.get(i, '') for i in source]) target = ' '.join([target_words.get(i, '') for i in target]) translate_one(source, target) if comm.rank == 0: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) report = extensions.PrintReport(['epoch', 'iteration', 'main/loss', 'main/perp', 'validation/main/bleu', 'elapsed_time']) trainer.extend(report, trigger=(1, 'epoch')) comm.mpi_comm.Barrier() if comm.rank == 0: print('start training') sys.stdout.flush() trainer.run()
def main(): parser = argparse.ArgumentParser(description='ChainerMN example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = L.Classifier(MLP(args.unit, 10)) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Create a multi node evaluator from a standard Chainer evaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def scatter_large_data(communicator): data = [] if communicator.rank == 0: data = ["test"] * 2000000000 data = chainermn.scatter_dataset(data, communicator) assert len(data) > 0
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'ssd300': model = SSD300(n_fg_class=len(epic_kitchens_bbox_category_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(epic_kitchens_bbox_category_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = EpicKitchensBboxDataset(year='2018', split='train') if comm.rank == 0: indices = np.arange(len(train)) else: indices = None train = TransformDataset(train, ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean)) indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=2) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (18, 'epoch'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger([12, 15], 'epoch')) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}.npz'), trigger=(1, 'epoch')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean)) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize // comm.size, n_processes=2) if comm.rank == 0: test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) if comm.rank == 0: trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(argv): if len(argv) < 5: print("python " + argv[0] + " data_path out_path layer epoch") sys.exit(0) data_path = argv[1] out_path = argv[2] layer = int(argv[3]) epoch = int(argv[4]) x_file = os.path.join(data_path, "en.txt") y_file = os.path.join(data_path, "ja.txt") vocab_path = os.path.join(data_path, "vocab.dump") # 単語とidの辞書 with open(vocab_path, "rb") as f: vocab = pickle.load(f) train_data1 = load_data(x_file, vocab) train_data2 = load_data(y_file, vocab) eos_id = vocab["<eos>"] batch_size = 256 demb = 256 drop_out = 0.5 model = gAtt(layer, len(vocab) + 1, demb, drop_out) comm = chainermn.create_communicator("single_node") device = comm.intra_rank chainer.cuda.get_device(device).use() model.to_gpu(device) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) if comm.rank == 0: # file・directory生成 date = datetime.datetime.today() folder_name = "_".join( [str(date.year), str(date.month), str(date.day)]) out_path = (os.path.join(out_path, folder_name, "".join( ["layer", str(layer)])) + os.sep) os.makedirs(os.path.dirname(out_path), exist_ok=True) loss_out_path = os.path.join( out_path, "".join(["loss_", str(epoch), "_", str(layer), ".csv"])) loss_out = open(loss_out_path, "w") print( "epoch:", epoch, " batch:", batch_size, " drop:", drop_out, " demb:", demb, " layer:", layer, end="\n", file=loss_out, ) xs = [] ys = [] s = [] # xのデータを生成 if comm.rank == 0: for pos in range(len(train_data1)): id = train_data1[pos] if id != eos_id: s += [id] else: xs += [xp.asarray(s, dtype=xp.int32)] s = [] # yのデータを生成 for pos in range(len(train_data2)): id = train_data2[pos] if id != eos_id: s += [id] else: ys += [xp.asarray(s, dtype=xp.int32)] s = [] # データを配る xs = chainermn.scatter_dataset(xs, comm) ys = chainermn.scatter_dataset(ys, comm) loss = None for cnt in range(epoch): index = np.random.permutation(len(xs)) for pos in range(0, len(xs), batch_size): # ミニバッチを生成 batch_xs = [] batch_ys = [] for idx in index[pos:pos + (batch_size)]: batch_xs.append(xs[idx]) batch_ys.append(ys[idx]) model.cleargrads() # 初期値を生成 hx = chainer.Variable( xp.zeros((2 * layer, len(batch_xs), demb), dtype=xp.float32)) cx = chainer.Variable( xp.zeros((2 * layer, len(batch_xs), demb), dtype=xp.float32)) # 学習する loss = model(hx, cx, batch_xs, batch_ys, len(batch_xs), vocab) loss.backward() optimizer.update() print(cnt + 1, " : ", pos + len(batch_xs), "/", len(xs), " finished") if comm.rank == 0: print(loss.array, end="\n", file=loss_out) out_file = out_path + "nsteplstm-" + str(layer) + "-" + str( cnt) + ".model" model.to_cpu() serializers.save_npz(out_file, model) model.to_gpu(0)
def check_mnist(gpu, display_log=True): epoch = 5 batchsize = 100 n_units = 100 comm = chainermn.create_communicator('naive') if gpu: device = comm.intra_rank chainer.cuda.get_device(device).use() else: device = -1 model = L.Classifier(MLP(n_units, 10)) if gpu: model.to_gpu() optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (epoch, 'epoch')) # Wrap standard Chainer evaluators by MultiNodeEvaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Add checkpointer. This is just to check checkpointing runs # without errors path = tempfile.mkdtemp(dir='/tmp', prefix=__name__ + "-tmp-") checkpointer = create_multi_node_checkpointer(name=__name__, comm=comm, path=path) trainer.extend(checkpointer, trigger=(1, 'epoch')) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0 and display_log: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ], out=sys.stderr), trigger=(1, 'epoch')) trainer.run() err = evaluator()['validation/main/accuracy'] assert err > 0.95 # Check checkpointer successfully finalized snapshot directory assert [] == os.listdir(path) os.removedirs(path)
def main(): parser = argparse.ArgumentParser( description='ChainerMN example: pipelined neural network') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: comm = chainermn.create_communicator('hierarchical') data_axis, model_axis = comm.rank % 2, comm.rank // 2 data_comm = comm.split(data_axis, comm.rank) model_comm = comm.split(model_axis, comm.rank) device = comm.intra_rank else: comm = chainermn.create_communicator('naive') data_axis, model_axis = comm.rank % 2, comm.rank // 2 data_comm = comm.split(data_axis, comm.rank) model_comm = comm.split(model_axis, comm.rank) device = -1 if model_comm.size != 2: raise ValueError( 'This example can only be executed on the even number' 'of processes.') if comm.rank == 0: print('==========================================') if args.gpu: print('Using GPUs') print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') if data_axis == 0: model = L.Classifier(MLP0(model_comm, args.unit)) elif data_axis == 1: model = MLP1(model_comm, args.unit, 10) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), data_comm) optimizer.setup(model) # Original dataset on worker 0 and 1. # Datasets of worker 0 and 1 are split and distributed to all workers. if model_axis == 0: train, test = chainer.datasets.get_mnist() if data_axis == 1: train = chainermn.datasets.create_empty_dataset(train) test = chainermn.datasets.create_empty_dataset(test) else: train, test = None, None train = chainermn.scatter_dataset(train, data_comm, shuffle=True) test = chainermn.scatter_dataset(test, data_comm, shuffle=True) train_iter = chainer.iterators.SerialIterator( train, args.batchsize, shuffle=False) test_iter = chainer.iterators.SerialIterator( test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, data_comm) trainer.extend(evaluator) # Some display and output extentions are necessary only for worker 0. if comm.rank == 0: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): model_cfgs = { 'resnet50': { 'class': ResNet50, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } }, 'resnet101': { 'class': ResNet101, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } }, 'resnet152': { 'class': ResNet152, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } } } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to root of the train dataset') parser.add_argument('val', help='Path to root of the validation dataset') parser.add_argument('--model', '-m', choices=model_cfgs.keys(), default='resnet50', help='Convnet models') parser.add_argument('--communicator', type=str, default='pure_nccl', help='Type of communicator') parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument('--batchsize', type=int, default=32, help='Batch size for each worker') parser.add_argument('--lr', type=float) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--weight_decay', type=float, default=0.0001) parser.add_argument('--out', type=str, default='result') parser.add_argument('--epoch', type=int, default=90) args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if args.lr is not None: lr = args.lr else: lr = 0.1 * (args.batchsize * comm.size) / 256 if comm.rank == 0: print('lr={}: lr is selected based on the linear ' 'scaling rule'.format(lr)) label_names = directory_parsing_label_names(args.train) model_cfg = model_cfgs[args.model] extractor = model_cfg['class'](n_class=len(label_names), **model_cfg['kwargs']) extractor.pick = model_cfg['score_layer_name'] model = Classifier(extractor) # Following https://arxiv.org/pdf/1706.02677.pdf, # the gamma of the last BN of each resblock is initialized by zeros. for l in model.links(): if isinstance(l, Bottleneck): l.conv3.bn.gamma.data[:] = 0 train_data = DirectoryParsingLabelDataset(args.train) val_data = DirectoryParsingLabelDataset(args.val) train_data = TransformDataset(train_data, ('img', 'label'), TrainTransform(extractor.mean)) val_data = TransformDataset(val_data, ('img', 'label'), ValTransform(extractor.mean)) print('finished loading dataset') if comm.rank == 0: train_indices = np.arange(len(train_data)) val_indices = np.arange(len(val_data)) else: train_indices = None val_indices = None train_indices = chainermn.scatter_dataset(train_indices, comm, shuffle=True) val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True) train_data = train_data.slice[train_indices] val_data = val_data.slice[val_indices] train_iter = chainer.iterators.MultiprocessIterator( train_data, args.batchsize, n_processes=args.loaderjob) val_iter = iterators.MultiprocessIterator(val_data, args.batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) optimizer.setup(model) for param in model.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(WeightDecay(args.weight_decay)) if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() updater = chainer.training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) @make_shift('lr') def warmup_and_exponential_shift(trainer): epoch = trainer.updater.epoch_detail warmup_epoch = 5 if epoch < warmup_epoch: if lr > 0.1: warmup_rate = 0.1 / lr rate = warmup_rate \ + (1 - warmup_rate) * epoch / warmup_epoch else: rate = 1 elif epoch < 30: rate = 1 elif epoch < 60: rate = 0.1 elif epoch < 80: rate = 0.01 else: rate = 0.001 return rate * lr trainer.extend(warmup_and_exponential_shift) evaluator = chainermn.create_multi_node_evaluator( extensions.Evaluator(val_iter, model, device=device), comm) trainer.extend(evaluator, trigger=(1, 'epoch')) log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' if comm.rank == 0: trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.snapshot_object( extractor, 'snapshot_model_{.updater.epoch}.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: seq2seq') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--bleu', action='store_true', default=False, help='Report BLEU score') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--cache', '-c', default=None, help='Directory to cache pre-processed dataset') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1024, help='Number of units') parser.add_argument('--communicator', default='hierarchical', help='Type of communicator') parser.add_argument('--stop', '-s', type=str, default='15e', help='Stop trigger (ex. "500i", "15e")') parser.add_argument('--input', '-i', type=str, default='wmt', help='Input directory') parser.add_argument('--optimizer', type=str, default='adam()', help='Optimizer and its argument') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') args = parser.parse_args() # Prepare ChainerMN communicator if args.gpu: comm = chainermn.create_communicator('hierarchical') dev = comm.intra_rank else: comm = chainermn.create_communicator('naive') dev = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('==========================================') # Rank 0 prepares all data if comm.rank == 0: if args.cache and not os.path.exists(args.cache): os.mkdir(args.cache) # Read source data bt = time.time() if args.cache: cache_file = os.path.join(args.cache, 'source.pickle') source_vocab, source_data = cached_call(cache_file, read_source, args.input, args.cache) else: source_vocab, source_data = read_source(args.input, args.cache) et = time.time() print('RD source done. {:.3f} [s]'.format(et - bt)) sys.stdout.flush() # Read target data bt = time.time() if args.cache: cache_file = os.path.join(args.cache, 'target.pickle') target_vocab, target_data = cached_call(cache_file, read_target, args.input, args.cache) else: target_vocab, target_data = read_target(args.input, args.cache) et = time.time() print('RD target done. {:.3f} [s]'.format(et - bt)) sys.stdout.flush() print('Original training data size: %d' % len(source_data)) train_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) < 50 and 0 < len(t) < 50] print('Filtered training data size: %d' % len(train_data)) en_path = os.path.join(args.input, 'dev', 'newstest2013.en') source_data = europal.make_dataset(en_path, source_vocab) fr_path = os.path.join(args.input, 'dev', 'newstest2013.fr') target_data = europal.make_dataset(fr_path, target_vocab) assert(len(source_data) == len(target_data)) test_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) and 0 < len(t)] source_ids = {word: index for index, word in enumerate(source_vocab)} target_ids = {word: index for index, word in enumerate(target_vocab)} else: # target_data, source_data = None, None train_data, test_data = None, None target_ids, source_ids = None, None # Print GPU id for i in range(0, comm.size): if comm.rank == i: print('Rank {} GPU: {}'.format(comm.rank, dev)) sys.stdout.flush() comm.mpi_comm.Barrier() # broadcast id- > word dictionary source_ids = comm.bcast_obj(source_ids, root=0) target_ids = comm.bcast_obj(target_ids, root=0) target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} if comm.rank == 0: print('target_words : {}'.format(len(target_words))) print('source_words : {}'.format(len(source_words))) model = Seq2seq(3, len(source_ids), len(target_ids), args.unit) if dev >= 0: chainer.cuda.get_device_from_id(dev).use() model.to_gpu(dev) # determine the stop trigger m = re.match(r'^(\d+)e$', args.stop) if m: trigger = (int(m.group(1)), 'epoch') else: m = re.match(r'^(\d+)i$', args.stop) if m: trigger = (int(m.group(1)), 'iteration') else: if comm.rank == 0: sys.stderr.write('Error: unknown stop trigger: {}'.format( args.stop)) exit(-1) if comm.rank == 0: print('Trigger: {}'.format(trigger)) optimizer = chainermn.create_multi_node_optimizer( create_optimizer(args.optimizer), comm) optimizer.setup(model) # Broadcast dataset # Sanity check of train_data train_data = chainermn.scatter_dataset(train_data, comm) test_data = chainermn.scatter_dataset(test_data, comm) train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize, shuffle=False) updater = training.StandardUpdater( train_iter, optimizer, converter=convert, device=dev) trainer = training.Trainer(updater, trigger, out=args.out) trainer.extend(chainermn.create_multi_node_evaluator( BleuEvaluator(model, test_data, device=dev, comm=comm), comm)) def translate_one(source, target): words = europal.split_sentence(source) print('# source : ' + ' '.join(words)) x = model.xp.array( [source_ids.get(w, 1) for w in words], numpy.int32) ys = model.translate([x])[0] words = [target_words[y] for y in ys] print('# result : ' + ' '.join(words)) print('# expect : ' + target) # @chainer.training.make_extension(trigger=(200, 'iteration')) def translate(trainer): translate_one( 'Who are we ?', 'Qui sommes-nous?') translate_one( 'And it often costs over a hundred dollars ' + 'to obtain the required identity card .', 'Or, il en coûte souvent plus de cent dollars ' + 'pour obtenir la carte d\'identité requise.') source, target = test_data[numpy.random.choice(len(test_data))] source = ' '.join([source_words.get(i, '') for i in source]) target = ' '.join([target_words.get(i, '') for i in target]) translate_one(source, target) if comm.rank == 0: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) report = extensions.PrintReport(['epoch', 'iteration', 'main/loss', 'main/perp', 'validation/main/bleu', 'elapsed_time']) trainer.extend(report, trigger=(1, 'epoch')) comm.mpi_comm.Barrier() if comm.rank == 0: print('start training') sys.stdout.flush() trainer.run()
def train(args, dataset_train, dataset_test): random.seed(0) np.random.seed(0) if args.multi_node: import chainermn comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank args.n_gpu = comm.size args.inter_size = comm.inter_size args.intra_size = comm.intra_size args.batch_size_total = args.batch_size * args.n_gpu chainer.cuda.get_device(device).use() else: args.batch_size_total = args.batch_size chainer.cuda.get_device_from_id(args.gpu).use() device = args.gpu # Model G_A = chainer_cyclegan.models.ResnetGenerator() G_B = chainer_cyclegan.models.ResnetGenerator() D_A = chainer_cyclegan.models.NLayerDiscriminator() D_B = chainer_cyclegan.models.NLayerDiscriminator() if args.multi_node or args.gpu >= 0: G_A.to_gpu() G_B.to_gpu() D_A.to_gpu() D_B.to_gpu() # Optimizer args.lr = 0.0002 args.beta1 = 0.5 args.beta2 = 0.999 optimizer_G_A = chainer.optimizers.Adam(alpha=args.lr, beta1=args.beta1, beta2=args.beta2) optimizer_G_B = chainer.optimizers.Adam(alpha=args.lr, beta1=args.beta1, beta2=args.beta2) optimizer_D_A = chainer.optimizers.Adam(alpha=args.lr, beta1=args.beta1, beta2=args.beta2) optimizer_D_B = chainer.optimizers.Adam(alpha=args.lr, beta1=args.beta1, beta2=args.beta2) if args.multi_node: optimizer_G_A = chainermn.create_multi_node_optimizer( optimizer_G_A, comm) optimizer_G_B = chainermn.create_multi_node_optimizer( optimizer_G_B, comm) optimizer_D_A = chainermn.create_multi_node_optimizer( optimizer_D_A, comm) optimizer_D_B = chainermn.create_multi_node_optimizer( optimizer_D_B, comm) optimizer_G_A.setup(G_A) optimizer_G_B.setup(G_B) optimizer_D_A.setup(D_A) optimizer_D_B.setup(D_B) # Dataset if args.multi_node: if comm.rank != 0: dataset_train = None dataset_test = None dataset_train = chainermn.scatter_dataset(dataset_train, comm, shuffle=True) dataset_test = chainermn.scatter_dataset(dataset_test, comm) iter_train = chainer.iterators.MultiprocessIterator( dataset_train, batch_size=args.batch_size, n_processes=4, shared_mem=10**7) iter_test = chainer.iterators.SerialIterator(dataset_test, batch_size=args.batch_size, repeat=False, shuffle=False) # Updater epoch_count = 1 niter = 100 niter_decay = 100 updater = chainer_cyclegan.updaters.CycleGANUpdater( iterator=iter_train, optimizer=dict( G_A=optimizer_G_A, G_B=optimizer_G_B, D_A=optimizer_D_A, D_B=optimizer_D_B, ), device=device, ) # Trainer out = osp.join('logs/train_cyclegan', datetime.datetime.now().strftime('%Y%m%d_%H%M%S')) trainer = training.Trainer(updater, (niter + niter_decay, 'epoch'), out=out) @training.make_extension(trigger=(1, 'epoch')) def tune_learning_rate(trainer): epoch = trainer.updater.epoch lr_rate = 1.0 - (max(0, epoch + 1 + epoch_count - niter) / float(niter_decay + 1)) trainer.updater.get_optimizer('G_A').alpha *= lr_rate trainer.updater.get_optimizer('G_B').alpha *= lr_rate trainer.updater.get_optimizer('D_A').alpha *= lr_rate trainer.updater.get_optimizer('D_B').alpha *= lr_rate trainer.extend(tune_learning_rate) if not args.multi_node or comm.rank == 0: trainer.extend( chainer_cyclegan.extensions.CycleGANEvaluator(iter_test, device=device)) trainer.extend(extensions.snapshot_object( target=G_A, filename='G_A_{.updater.epoch:08}.npz'), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object( target=G_B, filename='G_B_{.updater.epoch:08}.npz'), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object( target=D_A, filename='D_A_{.updater.epoch:08}.npz'), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object( target=D_B, filename='D_B_{.updater.epoch:08}.npz'), trigger=(1, 'epoch')) trainer.extend(extensions.LogReport(trigger=(20, 'iteration'))) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'elapsed_time', 'loss_gen_A', 'loss_gen_B', 'loss_dis_A', 'loss_dis_B', 'loss_cyc_A', 'loss_cyc_B', 'loss_idt_A', 'loss_idt_B', ])) trainer.extend(contrib.extensions.ParamsReport(args.__dict__)) trainer.extend(extensions.ProgressBar(update_interval=10)) assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport(y_keys=['loss_gen_A', 'loss_gen_B'], x_key='iteration', file_name='loss_gen.png', trigger=(100, 'iteration'))) trainer.extend( extensions.PlotReport(y_keys=['loss_dis_A', 'loss_dis_B'], x_key='iteration', file_name='loss_dis.png', trigger=(100, 'iteration'))) trainer.extend( extensions.PlotReport(y_keys=['loss_cyc_A', 'loss_cyc_B'], x_key='iteration', file_name='loss_cyc.png', trigger=(100, 'iteration'))) trainer.extend( extensions.PlotReport(y_keys=['loss_idt_A', 'loss_idt_B'], x_key='iteration', file_name='loss_idt.png', trigger=(100, 'iteration'))) trainer.run()
def main(): # Check if GPU is available # (ImageNet example does not support CPU execution) if not chainer.cuda.available: raise RuntimeError("ImageNet requires GPU support.") archs = { 'alex': alex.Alex, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.add_argument('--communicator', default='hierarchical') parser.set_defaults(test=False) args = parser.parse_args() # Prepare ChainerMN communicator. comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) print('Using {} communicator'.format(args.communicator)) print('Using {} arch'.format(args.arch)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = archs[args.arch]() if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) chainer.cuda.get_device_from_id(device).use() # Make the GPU current model.to_gpu() # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. mean = np.load(args.mean) if comm.rank == 0: train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset( args.val, args.root, mean, model.insize, False) else: train = None val = None train = chainermn.scatter_dataset(train, comm, shuffle=True) val = chainermn.scatter_dataset(val, comm) # We need to change the start method of multiprocessing module if we are # using InfiniBand and MultiprocessIterator. This is because processes # often crash when calling fork if they are using Infiniband. # (c.f., https://www.open-mpi.org/faq/?category=tuning#fork-warning ) multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9), comm) optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) checkpoint_interval = (10, 'iteration') if args.test else (1, 'epoch') val_interval = (10, 'iteration') if args.test else (1, 'epoch') log_interval = (10, 'iteration') if args.test else (1, 'epoch') checkpointer = chainermn.create_multi_node_checkpointer( name='imagenet-example', comm=comm) checkpointer.maybe_load(trainer, optimizer) trainer.extend(checkpointer, trigger=checkpoint_interval) # Create a multi node evaluator from an evaluator. evaluator = TestModeEvaluator(val_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=val_interval) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', choices=('resnet50', 'resnet101')) parser.add_argument('--batchsize', type=int, default=16) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'resnet50': model = FasterRCNNFPNResNet50( n_fg_class=len(coco_bbox_label_names), mean='chainercv') copyparams(model.extractor.base, ResNet50(pretrained_model='imagenet', arch='he')) elif args.model == 'resnet101': model = FasterRCNNFPNResNet101( n_fg_class=len(coco_bbox_label_names), mean='chainercv') copyparams(model.extractor.base, ResNet101(pretrained_model='imagenet', arch='he')) model.use_preset('evaluate') train_chain = TrainChain(model) chainer.cuda.get_device_from_id(device).use() train_chain.to_gpu() train = TransformDataset( ConcatenatedDataset( COCOBboxDataset(split='train'), COCOBboxDataset(split='valminusminival'), ), ('img', 'bbox', 'label'), transform) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] train_iter = chainer.iterators.MultithreadIterator( train, args.batchsize // comm.size) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) optimizer.add_hook(WeightDecay(0.0001)) model.extractor.base.conv1.disable_update() model.extractor.base.res2.disable_update() for link in model.links(): if isinstance(link, L.BatchNormalization): link.disable_update() updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=converter, device=device) trainer = training.Trainer( updater, (90000 * 16 / args.batchsize, 'iteration'), args.out) def lr_schedule(updater): base_lr = 0.02 * args.batchsize / 16 warm_up_duration = 500 warm_up_rate = 1 / 3 iteration = updater.iteration if iteration < warm_up_duration: rate = warm_up_rate \ + (1 - warm_up_rate) * iteration / warm_up_duration elif iteration < 60000 * 16 / args.batchsize: rate = 1 elif iteration < 80000 * 16 / args.batchsize: rate = 0.1 else: rate = 0.01 return base_lr * rate trainer.extend(ManualScheduler('lr', lr_schedule)) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/rpn/loc', 'main/loss/rpn/conf', 'main/loss/head/loc', 'main/loss/head/conf']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend( extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(90000 * 16 / args.batchsize, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer, strict=False) trainer.run()
def main(): parser = argparse.ArgumentParser(description='ChainerMN example: DCGAN') parser.add_argument('--batchsize', '-b', type=int, default=50, help='Number of images in each mini-batch') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--epoch', '-e', type=int, default=1000, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--dataset', '-i', default='', help='Directory of image files. Default is cifar-10.') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--gen_model', '-r', default='', help='Use pre-trained generator for training') parser.add_argument('--dis_model', '-d', default='', help='Use pre-trained discriminator for training') parser.add_argument('--n_hidden', '-n', type=int, default=100, help='Number of hidden units (z)') parser.add_argument('--seed', type=int, default=0, help='Random seed of z at visualization stage') parser.add_argument('--snapshot_interval', type=int, default=1000, help='Interval of snapshot') parser.add_argument('--display_interval', type=int, default=100, help='Interval of displaying log to console') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num hidden unit: {}'.format(args.n_hidden)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') # Set up a neural network to train gen = Generator(n_hidden=args.n_hidden) dis = Discriminator() if device >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(device).use() gen.to_gpu() # Copy the model to the GPU dis.to_gpu() # Setup an optimizer def make_optimizer(model, comm, alpha=0.0002, beta1=0.5): # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(alpha=alpha, beta1=beta1), comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001), 'hook_dec') return optimizer opt_gen = make_optimizer(gen, comm) opt_dis = make_optimizer(dis, comm) # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. if comm.rank == 0: if args.dataset == '': # Load the CIFAR10 dataset if args.dataset is not specified train, _ = chainer.datasets.get_cifar10(withlabel=False, scale=255.) else: all_files = os.listdir(args.dataset) image_files = [f for f in all_files if ('png' in f or 'jpg' in f)] print('{} contains {} image files' .format(args.dataset, len(image_files))) train = chainer.datasets\ .ImageDataset(paths=image_files, root=args.dataset) else: train = None train = chainermn.scatter_dataset(train, comm) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) # Set up a trainer updater = DCGANUpdater( models=(gen, dis), iterator=train_iter, optimizer={ 'gen': opt_gen, 'dis': opt_dis}, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: snapshot_interval = (args.snapshot_interval, 'iteration') display_interval = (args.display_interval, 'iteration') # Save only model parameters. # `snapshot` extension will save all the trainer module's attribute, # including `train_iter`. # However, `train_iter` depends on scattered dataset, which means that # `train_iter` may be different in each process. # Here, instead of saving whole trainer module, only the network models # are saved. trainer.extend(extensions.snapshot_object( gen, 'gen_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( dis, 'dis_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=display_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'gen/loss', 'dis/loss', 'elapsed_time', ]), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend( out_generated_image( gen, dis, 10, 10, args.seed, args.out), trigger=snapshot_interval) # Start the training using pre-trained model, saved by snapshot_object if args.gen_model: chainer.serializers.load_npz(args.gen_model, gen) if args.dis_model: chainer.serializers.load_npz(args.dis_model, dis) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer K-FAC example: MNIST') # NOQA parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--num_epochs', type=int, default=20) parser.add_argument('--snapshot_interval', type=int, default=-1) parser.add_argument('--no_cuda', action='store_true') parser.add_argument('--out', default='result') parser.add_argument('--resume', default='') parser.add_argument('--optimizer', default='kfac') parser.add_argument('--arch', choices=['mlp', 'cnn'], default='mlp') parser.add_argument('--plot', action='store_true') parser.add_argument('--distributed', action='store_true') args = parser.parse_args() # Prepare communicator if not args.distributed: # Single process execution comm = None rank = 0 device = -1 if args.no_cuda else 0 else: # Multiple processes execution, constructs a communicator. # chainerkfac uses different method to create a communicator from # chainermn. if args.optimizer == 'kfac': comm = chainerkfac.create_communicator('pure_nccl') else: comm = chainermn.create_communicator('pure_nccl') rank = comm.rank device = comm.intra_rank if rank == 0: print('======== DISTRIBUTED TRAINING ========') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.arch == 'mlp': model = L.Classifier(MLP()) in_ndim = 1 # input dimentions else: model = L.Classifier(CNN()) in_ndim = 3 # input dimentions if device >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(device).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer if args.optimizer == 'kfac': if comm is None: optimizer = chainerkfac.optimizers.KFAC() else: optimizer = chainerkfac.optimizers.DistributedKFAC(comm) else: optimizer = chainer.optimizers.Adam() if comm is not None: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) # Load the MNIST dataset if rank == 0: train, test = chainer.datasets.get_mnist(ndim=in_ndim) else: train, test = None, None if comm is not None: train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, args.batch_size) test_iter = chainer.iterators.SerialIterator(test, args.batch_size, repeat=False, shuffle=False) # Set up a trainer updater = training.updaters.StandardUpdater( train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.num_epochs, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch evaluator = extensions.Evaluator(test_iter, model, device=device) if comm is not None: evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if rank == 0: # Take a snapshot for each specified epoch snapshot_interval = args.num_epochs \ if args.snapshot_interval == -1 else max(1, args.snapshot_interval) trainer.extend(extensions.snapshot(), trigger=(snapshot_interval, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, # and "validation" refers to the default name of the Evaluator # extension. Entries other than 'epoch' are reported by the Classifier # link, called by either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def check_mnist(gpu, display_log=True): epoch = 5 batchsize = 100 n_units = 100 comm = chainermn.create_communicator('naive') if gpu: device = comm.intra_rank chainer.cuda.get_device_from_id(device).use() else: device = -1 model = L.Classifier(MLP(n_units, 10)) if gpu: model.to_gpu() optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater( train_iter, optimizer, device=device ) trainer = training.Trainer(updater, (epoch, 'epoch')) # Wrap standard Chainer evaluators by MultiNodeEvaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Add checkpointer. This is just to check checkpointing runs # without errors path = tempfile.mkdtemp(dir='/tmp', prefix=__name__ + "-tmp-") checkpointer = create_multi_node_checkpointer(name=__name__, comm=comm, path=path) trainer.extend(checkpointer, trigger=(1, 'epoch')) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0 and display_log: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'], out=sys.stderr), trigger=(1, 'epoch')) trainer.run() err = evaluator()['validation/main/accuracy'] assert err > 0.95 # Check checkpointer successfully finalized snapshot directory assert [] == os.listdir(path) os.removedirs(path)