def test_deprecation_single(): ranks = _communication_utility.init_ranks(mpi_comm) inter_size = ranks[4] if inter_size > 1: pytest.skip('This test is for single node only') with chainer.testing.assert_warns(DeprecationWarning): chainermn.create_communicator('single_node')
def setup(self, gpu): if gpu: self.communicator = chainermn.create_communicator('hierarchical') self.device = self.communicator.rank chainer.cuda.get_device_from_id(self.device).use() else: self.communicator = chainermn.create_communicator('naive') self.device = -1 if self.communicator.size != 2: pytest.skip('This test is for two processes')
def setup(self, gpu): numpy.random.seed(42) if gpu: self.communicator = chainermn.create_communicator('flat') self.device = self.communicator.intra_rank chainer.cuda.get_device_from_id(self.device).use() else: self.communicator = chainermn.create_communicator('naive') if self.communicator.size < 2: pytest.skip('This test is for multinode')
def create_communicator(gpu, param): if gpu: communicator = chainermn.create_communicator('flat') device = communicator.intra_rank chainer.cuda.get_device_from_id(device).use() else: communicator = chainermn.create_communicator('naive') if communicator.size < 2: pytest.skip('This test is for multinode') return communicator
def create_communicator(gpu): if gpu: communicator = chainermn.create_communicator('flat') chainer.cuda.get_device_from_id(communicator.intra_rank).use() else: communicator = chainermn.create_communicator('naive') if communicator.size < 2: pytest.skip('This test is for multinode only') rank_next = (communicator.rank + 1) % communicator.size rank_prev = (communicator.rank - 1) % communicator.size return communicator, rank_next, rank_prev
def setup(self, gpu): if gpu: self.communicator = chainermn.create_communicator('hierarchical') self.device = self.communicator.rank chainer.cuda.get_device_from_id(self.device).use() else: self.communicator = chainermn.create_communicator('naive') self.device = -1 if self.communicator.size != 2: pytest.skip('This test is for two processes') # dtypes to be tested # DO NOT USE chainer.testing.parameterize # (because running order of generated test cases is not unique) self.dtypes = [np.int32, np.int64, np.float32, np.float64]
def setUp(self): self.communicator = chainermn.create_communicator('naive') if self.communicator.size < 2: pytest.skip("This test is for multinode only") N = 100 self.dataset = np.arange(N).astype(np.float32)
def test_allreduce_persistent_gpu(self): comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank chainer.cuda.get_device_from_id(device).use() model = ExampleModel() model.to_gpu() self._test(comm, model)
def setUp(self): self.communicator = chainermn.create_communicator('naive') if self.communicator.size < 2: pytest.skip("This test is for multinode") self.rank_send = (self.communicator.rank + 1) % self.communicator.size self.rank_recv = (self.communicator.rank - 1) % self.communicator.size
def setup(self, gpu): if gpu: self.communicator = chainermn.create_communicator('hierarchical') chainer.cuda.get_device_from_id(self.communicator.intra_rank).use() else: self.communicator = chainermn.create_communicator('naive') if self.communicator.size < 2: pytest.skip('This test is for multinode only') self.rank_next = self.communicator.rank + 1 self.rank_prev = self.communicator.rank - 1 if self.rank_prev < 0: self.rank_prev = None if self.rank_next >= self.communicator.size: self.rank_next = None
def setup_cpu(self): self.comm = chainermn.create_communicator('naive') self.target = DynamicExampleModel() self.target.a.W.data[:] = self.comm.rank self.target.b.W.data[:] = self.comm.rank + 1 self.target.a.W.grad[:] = 0 self.target.b.W.grad[:] = 0 self.actual_optimizer = chainer.GradientMethod() self.actual_optimizer.create_update_rule = mock.MagicMock
def setup_gpu(self, device=None): self.comm = chainermn.create_communicator('hierarchical') device = self.comm.intra_rank chainer.cuda.get_device_from_id(device).use() self.target = DynamicExampleModel() self.target.to_gpu() self.target.a.W.data[:] = self.comm.rank self.target.b.W.data[:] = self.comm.rank + 1 self.target.a.W.grad[:] = 0 self.target.b.W.grad[:] = 0 self.actual_optimizer = chainer.GradientMethod() self.actual_optimizer.create_update_rule = mock.MagicMock
def setUp(self): if self.iterator_class == chainer.iterators.MultiprocessIterator and \ int(platform.python_version_tuple()[0]) < 3: pytest.skip('This test requires Python version >= 3') self.communicator = chainermn.create_communicator('naive') if self.communicator.size < 2: pytest.skip('This test is for multinode only') self.N = 6 self.dataset = numpy.arange(self.N).astype(numpy.float32) self.bs = 2
def setup(self, gpu): self.gpu = gpu if self.gpu: self.communicator = chainermn.create_communicator('hierarchical') device = self.communicator.intra_rank chainer.cuda.get_device_from_id(device).use() else: self.communicator = chainermn.create_communicator('naive') device = -1 if self.communicator.size < 2: pytest.skip("This test is for multinode") self.rank_send = (self.communicator.rank + 1) % self.communicator.size self.rank_recv = (self.communicator.rank - 1) % self.communicator.size # Activation function. self.f = chainer.functions.sigmoid # Evaluation function. self.evaluation = chainer.functions.mean_squared_error # Input data. self.x = chainer.Variable( numpy.arange(10).reshape(1, 10).astype(numpy.float32) / 10) self.model = chainer.links.Linear( 10, 10, initialW=self._init_w(self.communicator.rank)) self.entire_model = [chainer.links.Linear( 10, 10, initialW=self._init_w(l)) for l in range(self.communicator.size)] self.device = device if device >= 0: self.x.to_gpu() self.model.to_gpu() for model in self.entire_model: model.to_gpu()
def setup_gpu(self, device=None): if nccl.get_build_version() < 2000: pytest.skip('This test requires NCCL version >= 2.0') self.comm = chainermn.create_communicator('pure_nccl') device = self.comm.intra_rank chainer.cuda.get_device_from_id(device).use() self.target = DynamicExampleModel() self.target.to_gpu() self.target.a.W.data[:] = self.comm.rank self.target.b.W.data[:] = self.comm.rank + 1 self.target.a.W.grad[:] = 0 self.target.b.W.grad[:] = 0 self.actual_optimizer = chainer.GradientMethod() self.actual_optimizer.create_update_rule = mock.MagicMock
def setUp(self): self.x = np.random.uniform( -1, 1, (5, self.in_channels, 5, 5)).astype(np.float32) self.gy = np.random.uniform( -1, 1, (5, self.out_channels, 5, 5)).astype(np.float32) # Convolution is the identity function. initialW = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=np.float32).reshape((1, 1, 3, 3)) bn_kwargs = {'decay': 0.8, 'comm': create_communicator('naive')} initial_bias = 0 activ = relu self.l = Conv2DBNActiv( self.in_channels, self.out_channels, self.ksize, self.stride, self.pad, self.dilate, initialW=initialW, initial_bias=initial_bias, activ=activ, bn_kwargs=bn_kwargs)
def setUp(self): if self.iterator_class == chainer.iterators.MultiprocessIterator and \ int(platform.python_version_tuple()[0]) < 3: pytest.skip('This test requires Python version >= 3') self.communicator = chainermn.create_communicator('naive') if self.communicator.size < 2: pytest.skip("This test is for multinode only") self.N = 100 if self.paired_dataset: self.dataset = list(zip( np.arange(self.N).astype(np.float32), np.arange(self.N).astype(np.float32))) else: self.dataset = np.arange(self.N).astype(np.float32)
def test_non_variable_send(param): """Checks if backward will be called even if inputs are not Variable. This test confirms whether deadlock occurs when numpy/cupy array is given as an input of send. In this case, the input will be converted to chainer Variable without ``requires_grad``, thus ``backward`` will not be called without any modification. """ communicator = chainermn.create_communicator('naive') if communicator.size < 2: pytest.skip('This test is for multinode') rank_send = (communicator.rank + 1) % communicator.size rank_recv = (communicator.rank - 1) % communicator.size if communicator.rank == 0: x = numpy.ones((1, 10)).astype(param.dtype) phi = chainermn.functions.send( x, communicator, rank=rank_send) x, = chainermn.functions.pseudo_connect(phi, x) y = chainer.functions.sum(x) t = numpy.array(0).astype(param.dtype) z = chainer.functions.mean_squared_error(y, t) z.backward() elif communicator.rank == communicator.size - 1: x = chainermn.functions.recv(communicator, rank=rank_recv) y = chainer.functions.sum(x) t = numpy.array(0).astype(param.dtype) z = chainer.functions.mean_squared_error(y, t) z.backward() else: x = chainermn.functions.recv(communicator, rank=rank_recv) phi = chainermn.functions.send( x, communicator, rank=rank_send) phi.backward()
def setUp(self): self.communicator = chainermn.create_communicator('naive')
evaluator = chainer.training.extensions.Evaluator(valid_iter, model) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) report = evaluator() return report["main/accuracy"] if __name__ == "__main__": # Please make sure common study and storage are shared among nodes. study_name = sys.argv[1] storage_url = sys.argv[2] study = optuna.load_study(study_name, storage_url, pruner=optuna.pruners.MedianPruner()) comm = chainermn.create_communicator("naive") if comm.rank == 0: print("Study name:", study_name) print("Storage URL:", storage_url) print("Number of nodes:", comm.size) # Run optimization! chainermn_study = optuna.integration.ChainerMNStudy(study, comm) chainermn_study.optimize(objective, n_trials=25) if comm.rank == 0: pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED]) complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE]) print("Study statistics: ")
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument('--model', '-m', choices=['resnet50', 'resnet101'], default='resnet50', help='Base model of Mask R-CNN.') parser.add_argument('--pooling-func', '-p', choices=['pooling', 'align', 'resize'], default='align', help='Pooling function.') parser.add_argument('--gpu', '-g', type=int, help='GPU id.') parser.add_argument('--multi-node', action='store_true', help='use multi node') default_max_epoch = 120 parser.add_argument('--max-epoch', type=float, default=default_max_epoch, help='epoch') parser.add_argument('--pretrained-model', help='pretrained model') parser.add_argument( '--notrain', choices=['pix', 'ins'], help='not training pixel or instance segmentation', ) parser.add_argument( '--lr-base', default=0.00125, type=float, help='learning rate per batch size 1', ) parser.add_argument( '--noaugmentation', action='store_true', help='not apply data augmentation', ) parser.add_argument( '--pix-loss-scale', default=1., type=float, help='scale of pixel loss', ) parser.add_argument( '--dataset', default='occlusion', choices=['occlusion', 'occlusion+synthetic'], help='dataset', ) args = parser.parse_args() if args.multi_node: import chainermn comm = chainermn.create_communicator('pure_nccl') device = comm.intra_rank args.n_node = comm.inter_size args.n_gpu = comm.size chainer.cuda.get_device_from_id(device).use() else: if args.gpu is None: print( '--gpu option is required if --multi-node is not specified.', file=sys.stderr, ) sys.exit(1) args.n_node = 1 args.n_gpu = 1 chainer.cuda.get_device_from_id(args.gpu).use() device = args.gpu args.seed = 0 now = datetime.datetime.now() args.timestamp = now.isoformat() if not args.multi_node or comm.rank == 0: out = osp.join(here, 'logs', now.strftime('%Y%m%d_%H%M%S.%f')) else: out = None if args.multi_node: args.out = comm.bcast_obj(out) else: args.out = out del out # 0.00125 * 8 = 0.01 in original args.batch_size = 1 * args.n_gpu args.lr = args.lr_base * args.batch_size args.weight_decay = 0.0001 # lr / 10 at 120k iteration with # 160k iteration * 16 batchsize in original args.step_size = [(120e3 / 180e3) * args.max_epoch, (160e3 / 180e3) * args.max_epoch] random.seed(args.seed) np.random.seed(args.seed) # Default Config # args.min_size = 800 # args.max_size = 1333 # args.anchor_scales = (2, 4, 8, 16, 32) args.min_size = 600 args.max_size = 1000 args.anchor_scales = (4, 8, 16, 32) args.rpn_dim = 512 # ------------------------------------------------------------------------- # Dataset train_data = \ instance_occlsegm.datasets.PanopticOcclusionSegmentationDataset( 'train', augmentation=not args.noaugmentation ) if args.dataset == 'occlusion+synthetic': synthetic_data = \ instance_occlsegm.datasets.\ PanopticOcclusionSegmentationSyntheticDataset( do_aug=not args.noaugmentation, size=len(train_data), ) train_data = chainer.datasets.ConcatenatedDataset( train_data, synthetic_data) test_data = \ instance_occlsegm.datasets.PanopticOcclusionSegmentationDataset( 'test' ) fg_class_names = test_data.class_names args.class_names = fg_class_names.tolist() test_data_list = test_data.get_video_datasets() del test_data # ------------------------------------------------------------------------- # Model + Optimizer. if args.pooling_func == 'align': pooling_func = cmr.functions.roi_align_2d elif args.pooling_func == 'pooling': pooling_func = chainer.functions.roi_pooling_2d elif args.pooling_func == 'resize': pooling_func = cmr.functions.crop_and_resize else: raise ValueError args.mask_loss = 'softmax' assert args.model in ['resnet50', 'resnet101'] n_layers = int(args.model.lstrip('resnet')) mask_rcnn = instance_occlsegm.models.MaskRCNNPanopticResNet( n_layers=n_layers, n_fg_class=len(fg_class_names), pretrained_model=args.pretrained_model, pooling_func=pooling_func, anchor_scales=args.anchor_scales, min_size=args.min_size, max_size=args.max_size, rpn_dim=args.rpn_dim, ) mask_rcnn.nms_thresh = 0.3 mask_rcnn.score_thresh = 0.05 model = instance_occlsegm.models.MaskRCNNPanopticTrainChain( mask_rcnn, notrain=args.notrain, pix_loss_scale=args.pix_loss_scale, ) if args.multi_node or args.gpu >= 0: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) if args.multi_node: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) mask_rcnn.extractor.conv1.disable_update() mask_rcnn.extractor.bn1.disable_update() mask_rcnn.extractor.res2.disable_update() for link in mask_rcnn.links(): if isinstance(link, cmr.links.AffineChannel2D): link.disable_update() # ------------------------------------------------------------------------- # Iterator. train_data = chainer.datasets.TransformDataset( train_data, instance_occlsegm.datasets.MaskRCNNPanopticTransform(mask_rcnn), ) test_data_list = [ chainer.datasets.TransformDataset( td, instance_occlsegm.datasets.MaskRCNNPanopticTransform( mask_rcnn, train=False, )) for td in test_data_list ] test_concat_data = chainer.datasets.ConcatenatedDataset(*test_data_list) if args.multi_node: if comm.rank != 0: train_data = None train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) # for training train_iter = chainer.iterators.MultiprocessIterator( train_data, batch_size=1, n_processes=14, shared_mem=10**9, ) # for evaluation test_iters = { i: chainer.iterators.SerialIterator(td, batch_size=1, repeat=False, shuffle=False) for i, td in enumerate(test_data_list) } # for visualization test_concat_iter = chainer.iterators.SerialIterator(test_concat_data, batch_size=1, repeat=False, shuffle=False) # ------------------------------------------------------------------------- converter = functools.partial( cmr.datasets.concat_examples, padding=0, # img, bboxes, labels, masks, scales, lbls_vis, lbls_occ indices_concat=[0, 2, 3, 4, 5, 6], indices_to_device=[0, 1, 5, 6], ) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, device=device, converter=converter) trainer = training.Trainer(updater, (args.max_epoch, 'epoch'), out=args.out) trainer.extend(extensions.FailOnNonNumber()) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=training.triggers.ManualScheduleTrigger( args.step_size, 'epoch')) eval_interval = 1, 'epoch' log_interval = 10, 'iteration' plot_interval = 0.1, 'epoch' print_interval = log_interval if not args.multi_node or comm.rank == 0: evaluator = \ instance_occlsegm.extensions.PanopticSegmentationVOCEvaluator( test_iters, model.mask_rcnn, device=device, use_07_metric=False, label_names=fg_class_names, ) trainer.extend(evaluator, trigger=eval_interval) trainer.extend(extensions.snapshot_object(model.mask_rcnn, 'snapshot_model.npz'), trigger=training.triggers.MaxValueTrigger( 'validation/main/mpq', eval_interval)) args.git_hash = cmr.utils.git_hash() args.hostname = socket.gethostname() trainer.extend(fcn.extensions.ParamsReport(args.__dict__)) trainer.extend( instance_occlsegm.extensions.PanopticSegmentationVisReport( test_concat_iter, model.mask_rcnn, label_names=fg_class_names), trigger=eval_interval, ) trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend( extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/pix_vis_loss', 'main/pix_occ_loss', 'validation/main/miou', 'validation/main/mpq', ], ), trigger=print_interval, ) trainer.extend(extensions.ProgressBar(update_interval=10)) # plot assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport( [ 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/ins_loss', 'main/pix_vis_loss', 'main/pix_occ_loss' 'main/pix_loss' 'main/loss', ], file_name='loss.png', trigger=plot_interval, ), trigger=plot_interval, ) trainer.extend( extensions.PlotReport([ 'validation/main/miou/vis', 'validation/main/miou/occ', 'validation/main/miou', 'validation/main/map', 'validation/main/msq', 'validation/main/mdq', 'validation/main/mpq', ], file_name='accuracy.png', trigger=plot_interval), trigger=eval_interval, ) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
type=str, default=env.channel_input_dirs['train']) parser.add_argument('--test', type=str, default=env.channel_input_dirs['test']) args = parser.parse_args() train_file = np.load(os.path.join(args.train, 'train.npz')) test_file = np.load(os.path.join(args.test, 'test.npz')) logger.info('Current host: {}'.format(args.host)) communicator = 'naive' if args.num_gpus == 0 else args.communicator comm = chainermn.create_communicator(communicator) device = comm.intra_rank if args.num_gpus > 0 else -1 print('==========================================') print('Using {} communicator'.format(comm)) print('Num unit: {}'.format(args.units)) print('Num Minibatch-size: {}'.format(args.batch_size)) print('Num epoch: {}'.format(args.epochs)) print('==========================================') model = L.Classifier(MLP(args.units, 10)) if device >= 0: chainer.cuda.get_device(device).use() # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer(
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'ssd300': model = SSD300(n_fg_class=len(epic_kitchens_bbox_category_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(epic_kitchens_bbox_category_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = EpicKitchensBboxDataset(year='2018', split='train') if comm.rank == 0: indices = np.arange(len(train)) else: indices = None train = TransformDataset(train, ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean)) indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=2) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (18, 'epoch'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger([12, 15], 'epoch')) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}.npz'), trigger=(1, 'epoch')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(argv): if len(argv) < 5: print("python " + argv[0] + " data_path out_path layer epoch") sys.exit(0) data_path = argv[1] out_path = argv[2] layer = int(argv[3]) epoch = int(argv[4]) x_file = os.path.join(data_path, "en.txt") y_file = os.path.join(data_path, "ja.txt") vocab_path = os.path.join(data_path, "vocab.dump") # 単語とidの辞書 with open(vocab_path, "rb") as f: vocab = pickle.load(f) train_data1 = load_data(x_file, vocab) train_data2 = load_data(y_file, vocab) eos_id = vocab["<eos>"] batch_size = 256 demb = 256 drop_out = 0.5 model = gAtt(layer, len(vocab) + 1, demb, drop_out) comm = chainermn.create_communicator("single_node") device = comm.intra_rank chainer.cuda.get_device(device).use() model.to_gpu(device) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) if comm.rank == 0: # file・directory生成 date = datetime.datetime.today() folder_name = "_".join( [str(date.year), str(date.month), str(date.day)]) out_path = (os.path.join(out_path, folder_name, "".join( ["layer", str(layer)])) + os.sep) os.makedirs(os.path.dirname(out_path), exist_ok=True) loss_out_path = os.path.join( out_path, "".join(["loss_", str(epoch), "_", str(layer), ".csv"])) loss_out = open(loss_out_path, "w") print( "epoch:", epoch, " batch:", batch_size, " drop:", drop_out, " demb:", demb, " layer:", layer, end="\n", file=loss_out, ) xs = [] ys = [] s = [] # xのデータを生成 if comm.rank == 0: for pos in range(len(train_data1)): id = train_data1[pos] if id != eos_id: s += [id] else: xs += [xp.asarray(s, dtype=xp.int32)] s = [] # yのデータを生成 for pos in range(len(train_data2)): id = train_data2[pos] if id != eos_id: s += [id] else: ys += [xp.asarray(s, dtype=xp.int32)] s = [] # データを配る xs = chainermn.scatter_dataset(xs, comm) ys = chainermn.scatter_dataset(ys, comm) loss = None for cnt in range(epoch): index = np.random.permutation(len(xs)) for pos in range(0, len(xs), batch_size): # ミニバッチを生成 batch_xs = [] batch_ys = [] for idx in index[pos:pos + (batch_size)]: batch_xs.append(xs[idx]) batch_ys.append(ys[idx]) model.cleargrads() # 初期値を生成 hx = chainer.Variable( xp.zeros((2 * layer, len(batch_xs), demb), dtype=xp.float32)) cx = chainer.Variable( xp.zeros((2 * layer, len(batch_xs), demb), dtype=xp.float32)) # 学習する loss = model(hx, cx, batch_xs, batch_ys, len(batch_xs), vocab) loss.backward() optimizer.update() print(cnt + 1, " : ", pos + len(batch_xs), "/", len(xs), " finished") if comm.rank == 0: print(loss.array, end="\n", file=loss_out) out_file = out_path + "nsteplstm-" + str(layer) + "-" + str( cnt) + ".model" model.to_cpu() serializers.save_npz(out_file, model) model.to_gpu(0)
parser = argparse.ArgumentParser() parser.add_argument('--num-gpus', type=int, default=env.num_gpus) parser.add_argument('--communicator', type=str, default='naive' if env.num_gpus == 0 else 'pure_nccl') parser.add_argument('--current_host', type=str, default=env.current_host) parser.add_argument('--hosts', type=str, default=env.hosts) parser.add_argument('--output-data-dir', type=str, default=env.output_data_dir) args = parser.parse_args() comm = chainermn.create_communicator(args.communicator) num_hosts = len(args.hosts) print('process %s on host %s of %s starting' % (comm.intra_rank, args.current_host, num_hosts)) if comm.intra_rank == 1 and args.current_host != 'algo-1': os.makedirs(args.output_data_dir) # this sleep time must be longer than the polling interval to check if mpi is finished. print('process %s on host %s of %s sleeping' % (comm.intra_rank, args.current_host, num_hosts)) time.sleep(20) open(os.path.join(args.output_data_dir, 'process_could_complete'), 'a').close()
def main(): import chainermn chainer.global_config.autotune = True parser = argparse.ArgumentParser(description='ChainerMN example: Train MQAP using 3DCNN') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', action='store_true', help='Resume the training from snapshot') parser.add_argument('--weight', '-w', action='store_true', help='Resume only weight') parser.add_argument('--config', '-c', type=int, default=0, help='Number of config') parser.add_argument('--config_file', type=str, default='./data/config.json', help='Config file path') args = parser.parse_args() if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator, allreduce_grad_dtype='float16') device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 f = open(args.config_file, 'r') config = json.load(f)['Config'][args.config] args.out = os.path.join(args.out, str(args.config)) if comm.rank == 0: print('==========================================') chainer.print_runtime_info() print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num epoch: {}'.format(config['epoch'])) print('Batch size: {}'.format(config['batch_size'] * comm.size)) print('Optimizer: {}'.format(config['optimizer'])) print('Learning Rate: {}'.format(config['learning_rate'])) print('Out Directory: {}'.format(args.out)) print('Vertex feature: {}'.format(config['vertex_feature'])) if config['global_mode']: print('Using Global loss') if config['local_mode']: print('Using local loss') print('Local type : {}'.format(config['local_type'])) print('Local label : {}'.format(config['local_label'])) print('==========================================') d = Dataproc(size=comm.size, rank=comm.rank, config=config) if device >= 0: chainer.cuda.get_device(device).use() # sub_comm = comm.split(comm.rank // comm.intra_size, comm.rank) if config['local_type'] == 'Regression': local_loss_func = F.mean_squared_error else: local_loss_func = F.sigmoid_cross_entropy global_loss_func = F.mean_squared_error model = build_model(config=config, comm=comm) model = Classifier(predictor=model, local_loss_func=local_loss_func, global_loss_func=global_loss_func, config=config) if device >= 0: model.to_gpu() train, test = d.get_dataset(key='train'), d.get_dataset(key='test') train_iter = I.SerialIterator(dataset=train, batch_size=config['batch_size'], repeat=True, shuffle=True) test_iter = I.SerialIterator(dataset=test, batch_size=config['batch_size'], repeat=False, shuffle=False) # train_iter = I.MultiprocessIterator(dataset=train, batch_size=args.batch, repeat=True, shuffle=True, n_processes=10) # test_iter = I.MultiprocessIterator(dataset=test, batch_size=args.batch, repeat=False, shuffle=True, n_processes=10) if config['optimizer'] == 'Adam': optimizer = chainer.optimizers.Adam(alpha=config['learning_rate'], weight_decay_rate=config['weight_decay_rate'], amsgrad=True) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm, double_buffering=False) elif config['optimizer'] == 'MomentumSGD': optimizer = chainer.optimizers.MomentumSGD(lr=config['learning_rate']) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm, double_buffering=False) elif config['optimizer'] == 'SMORMS3': optimizer = chainer.optimizers.SMORMS3(lr=config['learning_rate']) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm, double_buffering=False) elif config['optimizer'] == 'Eve': from my_optimizer.eve import Eve, create_multi_node_optimizer optimizer = Eve(alpha=config['learning_rate']) optimizer = create_multi_node_optimizer(optimizer, comm, double_buffering=False) elif config['optimizer'] == 'Adabound': from my_optimizer.adabound import Adam as Adabound optimizer = Adabound(alpha=config['learning_rate'], adabound=True, amsgrad=True, weight_decay_rate=config['weight_decay_rate']) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm, double_buffering=False) optimizer.setup(model) val_interval = 1, 'epoch' log_interval = 1, 'epoch' updater = training.StandardUpdater(train_iter, optimizer, device=device, converter=d.get_converter()) trainer = training.Trainer(updater, (config['epoch'], 'epoch'), out=args.out) evaluator = GraphEvaluator(iterator=test_iter, target=model.predictor, device=device, converter=d.get_converter(), comm=comm, local_loss_func=local_loss_func, global_loss_func=global_loss_func, name='val', config=config) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=val_interval) if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PlotReport(['main/loss', 'val/main/loss'], 'epoch', file_name='loss.png'), trigger=val_interval) report_list = ['epoch', 'main/loss', 'val/main/loss'] if config['global_mode']: report_list.extend(['main/global_loss', 'val/main/global_loss', 'val/main/global_pearson']) trainer.extend(extensions.PlotReport(['main/global_loss', 'val/main/global_loss'], 'epoch', file_name='global_loss.png'), trigger=val_interval) if config['local_mode']: report_list.extend(['main/local_loss', 'val/main/local_loss', 'val/main/local_mean_pearson']) if config['local_type'] == 'Classification': report_list.append('val/main/local_auc') trainer.extend(extensions.PlotReport(['val/main/local_auc'], 'epoch', file_name='local_auc.png'), trigger=val_interval) else: report_list.append('val/main/local_pearson') report_list.append('elapsed_time') trainer.extend(extensions.PrintReport(report_list), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: snap_list = [p for p in os.listdir(args.out) if 'snapshot' in p] snap_num = np.array([int(re.findall("[+-]?[0-9]+[\.]?[0-9]*[eE]?[+-]?[0-9]*", p)[0]) for p in snap_list]) path = snap_list[np.argmax(snap_num)] path = os.path.join(args.out, path) if args.weight: obj_path = 'updater/model:main/predictor/' chainer.serializers.load_npz(path, model.predictor, obj_path) else: chainer.serializers.load_npz(path, trainer) if comm.rank == 0: protein_name_dict = d.get_protein_name_dict() out_path = Path(args.out) if not out_path.exists(): out_path.mkdir(parents=True, exist_ok=True) np.savez(os.path.join(args.out, 'protein_name'), **protein_name_dict) f = open(os.path.join(args.out, 'config.json'), 'w') json.dump(config, f, ensure_ascii=False, indent=4, sort_keys=True, separators=(',', ': ')) f.close() f = open(os.path.join(args.out, 'args.json'), 'w') json.dump(vars(args), f) f.close() if comm.rank == 0: print('train start!!!') trainer.run()
def test_allreduce_persistent_gpu(self): comm = chainermn.create_communicator('flat') model = ExampleModel() self._test(comm, model, True, False) # GPU test (CuPy) self._test(comm, model, True, True) # GPU test (ChainerX)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('dataset', choices=['real', 'synthetic'], help='The dataset.') parser.add_argument('--model', '-m', choices=['vgg16', 'resnet50', 'resnet101'], default='resnet50', help='Base model of Mask R-CNN.') parser.add_argument('--pooling-func', '-pf', choices=['pooling', 'align', 'resize'], default='align', help='Pooling function.') parser.add_argument('--gpu', '-g', type=int, help='GPU id.') parser.add_argument('--multi-node', '-mn', action='store_true', help='use multi node') parser.add_argument('--max-epoch', type=float, help='Epoch (default: 12.17)') args = parser.parse_args() if args.multi_node: import chainermn comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank args.n_node = comm.inter_size args.n_gpu = comm.size chainer.cuda.get_device_from_id(device).use() else: args.n_node = 1 args.n_gpu = 1 chainer.cuda.get_device_from_id(args.gpu).use() device = args.gpu args.seed = 0 now = datetime.datetime.now() args.timestamp = now.isoformat() args.out = osp.join(here, 'logs/train_mrcnn', now.strftime('%Y%m%d_%H%M%S')) # 0.00125 * 8 = 0.01 in original args.batch_size = 1 * args.n_gpu args.lr = 0.00125 * args.batch_size args.weight_decay = 0.0001 if args.max_epoch is None: # (180e3 * 8) / len(coco_trainval) args.max_epoch = (180e3 * 8) / 118287 # lr / 10 at 120k iteration with # 160k iteration * 16 batchsize in original args.step_size = [(120e3 / 180e3) * args.max_epoch, (160e3 / 180e3) * args.max_epoch] random.seed(args.seed) np.random.seed(args.seed) # Default Config min_size = 600 max_size = 1000 anchor_scales = [4, 8, 16, 32] proposal_creator_params = dict( n_train_pre_nms=12000, n_train_post_nms=2000, n_test_pre_nms=6000, n_test_post_nms=1000, min_size=0, ) # if args.dataset == 'voc': # train_data = mrcnn.datasets.SBDInstanceSeg('train') # test_data = mrcnn.datasets.VOC2012InstanceSeg('val') # elif args.dataset == 'coco': # train_data = chainer.datasets.ConcatenatedDataset( # mrcnn.datasets.CocoInstanceSeg('train'), # mrcnn.datasets.CocoInstanceSeg('valminusminival'), # ) # test_data = mrcnn.datasets.CocoInstanceSeg('minival') # train_data.class_names = test_data.class_names # min_size = 800 # max_size = 1333 # else: # raise ValueError # instance_class_names = train_data.class_names[1:] # train_data = mrcnn.datasets.MaskRcnnDataset(train_data) # test_data = mrcnn.datasets.MaskRcnnDataset(test_data) if args.dataset == 'real': train_data = contrib.datasets.ARC2017RealInstancesDataset( 'train', aug='standard') elif args.dataset == 'synthetic': train_data = contrib.datasets.ARC2017SyntheticInstancesDataset( do_aug=True, aug_level='all') else: raise ValueError test_data = contrib.datasets.ARC2017RealInstancesDataset('test') instance_class_names = train_data.class_names[1:] train_data = MaskRcnnDataset(train_data) test_data = MaskRcnnDataset(test_data) if args.pooling_func == 'align': pooling_func = mrcnn.functions.roi_align_2d elif args.pooling_func == 'pooling': pooling_func = chainer.functions.roi_pooling_2d elif args.pooling_func == 'resize': pooling_func = mrcnn.functions.crop_and_resize else: raise ValueError if args.model == 'vgg16': mask_rcnn = mrcnn.models.MaskRCNNVGG16( n_fg_class=len(instance_class_names), pretrained_model='imagenet', pooling_func=pooling_func, anchor_scales=anchor_scales, proposal_creator_params=proposal_creator_params, min_size=min_size, max_size=max_size) elif args.model in ['resnet50', 'resnet101']: n_layers = int(args.model.lstrip('resnet')) mask_rcnn = mrcnn.models.MaskRCNNResNet( n_layers=n_layers, n_fg_class=len(instance_class_names), pretrained_model='imagenet', pooling_func=pooling_func, anchor_scales=anchor_scales, proposal_creator_params=proposal_creator_params, min_size=min_size, max_size=max_size) else: raise ValueError mask_rcnn.use_preset('evaluate') model = mrcnn.models.MaskRCNNTrainChain( mask_rcnn, proposal_target_creator=mrcnn.utils.ProposalTargetCreator( n_sample=512), ) if args.multi_node or args.gpu >= 0: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) if args.multi_node: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) if args.model in ['resnet50', 'resnet101']: model.mask_rcnn.extractor.mode = 'res3+' mask_rcnn.extractor.conv1.disable_update() mask_rcnn.extractor.bn1.disable_update() mask_rcnn.extractor.res2.disable_update() train_data = chainer.datasets.TransformDataset( train_data, mrcnn.datasets.MaskRCNNTransform(mask_rcnn)) test_data = chainer.datasets.TransformDataset( test_data, mrcnn.datasets.MaskRCNNTransform(mask_rcnn, train=False)) if args.multi_node: if comm.rank != 0: train_data = None test_data = None train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) test_data = chainermn.scatter_dataset(test_data, comm) train_iter = chainer.iterators.MultiprocessIterator(train_data, batch_size=1, n_prefetch=4, shared_mem=10**8) test_iter = chainer.iterators.MultiprocessIterator(test_data, batch_size=1, n_prefetch=4, shared_mem=10**8, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=device, converter=mrcnn.datasets.concat_examples) trainer = training.Trainer(updater, (args.max_epoch, 'epoch'), out=args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=training.triggers.ManualScheduleTrigger( args.step_size, 'epoch')) eval_interval = 1, 'epoch' log_interval = 20, 'iteration' plot_interval = 0.1, 'epoch' print_interval = 20, 'iteration' evaluator = mrcnn.extensions.InstanceSegmentationVOCEvaluator( test_iter, model.mask_rcnn, device=device, use_07_metric=True, label_names=instance_class_names) if args.multi_node: evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=eval_interval) if not args.multi_node or comm.rank == 0: trainer.extend(extensions.snapshot_object(model.mask_rcnn, 'snapshot_model.npz'), trigger=training.triggers.MaxValueTrigger( 'validation/main/map', eval_interval)) args.git_hash = mrcnn.utils.git_hash() args.hostname = socket.gethostname() trainer.extend(fcn.extensions.ParamsReport(args.__dict__)) trainer.extend(mrcnn.extensions.InstanceSegmentationVisReport( test_iter, model.mask_rcnn, label_names=instance_class_names), trigger=eval_interval) trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map' ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) # plot assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport([ 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', ], file_name='loss.png', trigger=plot_interval), trigger=plot_interval, ) trainer.extend( extensions.PlotReport(['validation/main/map'], file_name='accuracy.png', trigger=plot_interval), trigger=eval_interval, ) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): info = collections.OrderedDict() parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--root_train', default='.', help='Root directory path of training image files') parser.add_argument('--root_val', default='.', help='Root directory path of validation image files') parser.add_argument('--arch', '-a', choices=archs.keys(), default='resnet50_akiba', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--communicator', default='hierarchical') parser.set_defaults(test=False) args = parser.parse_args() # # ChainerMN initialization # comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank chainer.cuda.get_device(device).use() chainer.cuda.set_max_workspace_size(1 * 1024 * 1024 * 1024) # # Logging # if comm.rank == 0: result_directory = args.out else: import tempfile result_directory = tempfile.mkdtemp(dir='/tmp/') # # Model # model = archs[args.arch]() model.to_gpu() # # Dataset # if comm.rank == 0: train = dataset.PreprocessedDataset( args.train, args.root_train, model.insize) else: train = None train = chainermn.scatter_dataset(train, comm) multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) # # Optimizer # global_batchsize = comm.size * args.batchsize lr = 0.1 * global_batchsize / 256 if comm.rank == 0: print('global_batchsize:', global_batchsize) print('Num of GPUs:', comm.size) weight_decay = 0.0001 optimizer = chainer.optimizers.MomentumSGD(lr=lr, momentum=0.9) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay)) info['training'] = { 'local_batchsize': args.batchsize, 'global_batchsize': global_batchsize, 'lr': lr } # # Trainer # log_interval = (10, 'iteration') stop_trigger = (200, 'iteration') updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, stop_trigger, result_directory) log_report_ext = extensions.LogReport(trigger=log_interval) trainer.extend(log_report_ext) if comm.rank == 0: trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(): # Check if GPU is available # (ImageNet example does not support CPU execution) if not chainer.cuda.available: raise RuntimeError('ImageNet requires GPU support.') archs = { 'alex': alex.Alex, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.add_argument('--communicator', default='hierarchical') parser.set_defaults(test=False) args = parser.parse_args() # Start method of multiprocessing module need to be changed if we # are using InfiniBand and MultiprocessIterator. This is because # processes often crash when calling fork if they are using # Infiniband. (c.f., # https://www.open-mpi.org/faq/?category=tuning#fork-warning ) # Also, just setting the start method does not seem to be # sufficient to actually launch the forkserver processes, so also # start a dummy process. # See also our document: # https://chainermn.readthedocs.io/en/stable/tutorial/tips_faqs.html#using-multiprocessiterator # This must be done *before* ``chainermn.create_communicator``!!! multiprocessing.set_start_method('forkserver') p = multiprocessing.Process(target=lambda *x: x, args=()) p.start() p.join() # Prepare ChainerMN communicator. comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) print('Using {} communicator'.format(args.communicator)) print('Using {} arch'.format(args.arch)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = archs[args.arch]() if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) chainer.cuda.get_device_from_id(device).use() # Make the GPU current model.to_gpu() # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. mean = np.load(args.mean) if comm.rank == 0: train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset(args.val, args.root, mean, model.insize, False) else: train = None val = None train = chainermn.scatter_dataset(train, comm, shuffle=True) val = chainermn.scatter_dataset(val, comm) # A workaround for processes crash should be done before making # communicator above, when using fork (e.g. MultiProcessIterator) # along with Infiniband. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9), comm) optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) checkpoint_interval = (10, 'iteration') if args.test else (1, 'epoch') val_interval = (10, 'iteration') if args.test else (1, 'epoch') log_interval = (10, 'iteration') if args.test else (1, 'epoch') checkpointer = chainermn.create_multi_node_checkpointer( name='imagenet-example', comm=comm) checkpointer.maybe_load(trainer, optimizer) trainer.extend(checkpointer, trigger=checkpoint_interval) # Create a multi node evaluator from an evaluator. evaluator = TestModeEvaluator(val_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=val_interval) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', choices=('faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'), default='faster_rcnn_fpn_resnet50') parser.add_argument('--batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=90000) parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000]) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'faster_rcnn_fpn_resnet50': model = FasterRCNNFPNResNet50( n_fg_class=len(coco_bbox_label_names), pretrained_model='imagenet') elif args.model == 'faster_rcnn_fpn_resnet101': model = FasterRCNNFPNResNet101( n_fg_class=len(coco_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = TrainChain(model) chainer.cuda.get_device_from_id(device).use() train_chain.to_gpu() train = TransformDataset( COCOBboxDataset(year='2017', split='train'), ('img', 'bbox', 'label'), transform) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] train_iter = chainer.iterators.MultithreadIterator( train, args.batchsize // comm.size) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) optimizer.add_hook(WeightDecay(0.0001)) model.extractor.base.conv1.disable_update() model.extractor.base.res2.disable_update() for link in model.links(): if isinstance(link, L.BatchNormalization): link.disable_update() updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=converter, device=device) trainer = training.Trainer( updater, (args.iteration * 16 / args.batchsize, 'iteration'), args.out) @make_shift('lr') def lr_schedule(trainer): base_lr = 0.02 * args.batchsize / 16 warm_up_duration = 500 warm_up_rate = 1 / 3 iteration = trainer.updater.iteration if iteration < warm_up_duration: rate = warm_up_rate \ + (1 - warm_up_rate) * iteration / warm_up_duration else: rate = 1 for step in args.step: if iteration >= step * 16 / args.batchsize: rate *= 0.1 return base_lr * rate trainer.extend(lr_schedule) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/rpn/loc', 'main/loss/rpn/conf', 'main/loss/head/loc', 'main/loss/head/conf']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend( extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(90000 * 16 / args.batchsize, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer, strict=False) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config_path', type=str, default='configs/base.yml', help='path to config file') parser.add_argument('--results_dir', type=str, default='./result/', help='directory to save the results to') parser.add_argument('--resume', type=str, default='', help='path to the snapshot') parser.add_argument('--process_num', type=int, default=0) parser.add_argument('--seed', type=int, default=42) args = parser.parse_args() config = yaml_utils.Config( yaml.load(open(args.config_path), Loader=yaml.SafeLoader)) pattern = "-".join([ config.pattern, config.models['classifier']['name'], config.dataset['dataset_name'] ]) comm = chainermn.create_communicator() device = comm.intra_rank if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size())) print('Num Minibatch-size: {}'.format(config.batchsize)) print('Num Epoch: {}'.format(config.epoch)) print('==========================================') # Model classifier = load_models(config.models['classifier']) if args.resume: print("Resume training with snapshot:{}".format(args.resume)) chainer.serializers.load_npz(args.resume, classifier) chainer.cuda.get_device_from_id(device).use() classifier.to_gpu() # models = {"classifier": classifier} # Optimizer opt = make_optimizer(classifier, comm, config) opt.add_hook(chainer.optimizer.WeightDecay(5e-4)) # Dataset if comm.rank == 0: dataset = yaml_utils.load_dataset(config) first_size = int(len(dataset) * config.train_val_split_ratio) train, val = chainer.datasets.split_dataset_random(dataset, first_size, seed=args.seed) else: yaml_utils.load_module(config.dataset['dataset_func'], config.dataset['dataset_name']) train, val = None, None train = chainermn.scatter_dataset(train, comm) val = chainermn.scatter_dataset(val, comm) # Iterator train_iterator = chainer.iterators.SerialIterator(train, config.batchsize) val_iterator = chainer.iterators.SerialIterator(val, config.batchsize, repeat=False, shuffle=False) kwargs = config.updater['args'] if 'args' in config.updater else {} kwargs.update({ 'classifier': classifier, 'iterator': train_iterator, 'optimizer': opt, 'device': device, }) # Updater updater = yaml_utils.load_updater_class(config) updater = updater(**kwargs) out = args.results_dir + '/' + pattern if comm.rank == 0: create_result_dir(out, args.config_path, config) # Trainer trainer = training.Trainer(updater, (config.epoch, 'epoch'), out=out) # Evaluator evaluator = ClassifierEvaluator(val_iterator, classifier, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Learning Rate Schedule (fixed) schedule = [config.epoch * 0.3, config.epoch * 0.6, config.epoch * 0.8] trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=ManualScheduleTrigger(schedule, 'epoch')) report_keys = [ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ] if comm.rank == 0: # Set up logging trainer.extend(extensions.snapshot_object( classifier, 'classifier{}.npz'.format(args.process_num)), trigger=MaxValueTrigger('validation/main/accuracy')) trainer.extend( extensions.LogReport(keys=report_keys, trigger=(config.display_interval, 'epoch'))) trainer.extend(extensions.PrintReport(report_keys), trigger=(config.display_interval, 'epoch')) trainer.extend( extensions.ProgressBar( update_interval=config.progressbar_interval)) # Run the training trainer.run()
def main(): model_cfgs = { 'resnet50': { 'class': ResNet50, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } }, 'resnet101': { 'class': ResNet101, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } }, 'resnet152': { 'class': ResNet152, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } } } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to root of the train dataset') parser.add_argument('val', help='Path to root of the validation dataset') parser.add_argument('--model', '-m', choices=model_cfgs.keys(), default='resnet50', help='Convnet models') parser.add_argument('--communicator', type=str, default='pure_nccl', help='Type of communicator') parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument('--batchsize', type=int, default=32, help='Batch size for each worker') parser.add_argument('--lr', type=float) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--weight_decay', type=float, default=0.0001) parser.add_argument('--out', type=str, default='result') parser.add_argument('--epoch', type=int, default=90) args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if args.lr is not None: lr = args.lr else: lr = 0.1 * (args.batchsize * comm.size) / 256 if comm.rank == 0: print('lr={}: lr is selected based on the linear ' 'scaling rule'.format(lr)) label_names = directory_parsing_label_names(args.train) model_cfg = model_cfgs[args.model] extractor = model_cfg['class'](n_class=len(label_names), **model_cfg['kwargs']) extractor.pick = model_cfg['score_layer_name'] model = Classifier(extractor) # Following https://arxiv.org/pdf/1706.02677.pdf, # the gamma of the last BN of each resblock is initialized by zeros. for l in model.links(): if isinstance(l, Bottleneck): l.conv3.bn.gamma.data[:] = 0 train_data = DirectoryParsingLabelDataset(args.train) val_data = DirectoryParsingLabelDataset(args.val) train_data = TransformDataset(train_data, ('img', 'label'), TrainTransform(extractor.mean)) val_data = TransformDataset(val_data, ('img', 'label'), ValTransform(extractor.mean)) print('finished loading dataset') if comm.rank == 0: train_indices = np.arange(len(train_data)) val_indices = np.arange(len(val_data)) else: train_indices = None val_indices = None train_indices = chainermn.scatter_dataset(train_indices, comm, shuffle=True) val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True) train_data = train_data.slice[train_indices] val_data = val_data.slice[val_indices] train_iter = chainer.iterators.MultiprocessIterator( train_data, args.batchsize, n_processes=args.loaderjob) val_iter = iterators.MultiprocessIterator(val_data, args.batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) optimizer.setup(model) for param in model.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(WeightDecay(args.weight_decay)) if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() updater = chainer.training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) @make_shift('lr') def warmup_and_exponential_shift(trainer): epoch = trainer.updater.epoch_detail warmup_epoch = 5 if epoch < warmup_epoch: if lr > 0.1: warmup_rate = 0.1 / lr rate = warmup_rate \ + (1 - warmup_rate) * epoch / warmup_epoch else: rate = 1 elif epoch < 30: rate = 1 elif epoch < 60: rate = 0.1 elif epoch < 80: rate = 0.01 else: rate = 0.001 return rate * lr trainer.extend(warmup_and_exponential_shift) evaluator = chainermn.create_multi_node_evaluator( extensions.Evaluator(val_iter, model, device=device), comm) trainer.extend(evaluator, trigger=(1, 'epoch')) log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' if comm.rank == 0: trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.snapshot_object( extractor, 'snapshot_model_{.updater.epoch}.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(): parser = argparse.ArgumentParser( description="Train a KISS model", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("log_name", help="name of log") parser.add_argument("-c", "--config", default="config.cfg", help="path to config file to use") parser.add_argument("-g", "--gpu", nargs='+', default=["-1"], help="gpu if to use (-1 means cpu)") parser.add_argument("-l", "--log-dir", default='tests', help="path to log dir") parser.add_argument( "--snapshot-interval", type=int, default=10000, help="number of iterations after which a snapshot will be taken") parser.add_argument("--log-interval", type=int, default=100, help="log interval") parser.add_argument( "--port", type=int, default=1337, help= "port that is used by bbox plotter to send predictions on test image") parser.add_argument( "--rl", dest="resume_localizer", help= "path to snapshot that is to be used to resume training of localizer") parser.add_argument( "--rr", dest="resume_recognizer", help="path to snapshot that us to be used to pre-initialize recognizer" ) parser.add_argument("--num-layers", type=int, default=18, help="Resnet Variant to use") parser.add_argument( "--no-imgaug", action='store_false', dest='use_imgaug', default=True, help= "disable image augmentation with `imgaug`, but use naive image augmentation instead" ) parser.add_argument( "--rdr", "--rotation-dropout-ratio", dest="rotation_dropout_ratio", type=float, default=0, help="ratio for dropping rotation params in text localization network") parser.add_argument("--save-gradient-information", action='store_true', default=False, help="enable tensorboard gradient plotter") parser.add_argument("--dump-graph", action='store_true', default=False, help="dump computational graph to file") parser.add_argument("--image-mode", default="RGB", choices=["RGB", "L"], help="mode in which images are to be loaded") parser.add_argument("--resume", help="path to logdir from which training shall resume") args = parser.parse_args() args = parse_config(args.config, args) # comm = chainermn.create_communicator(communicator_name='flat') comm = chainermn.create_communicator() args.gpu = comm.intra_rank print(args.gpu) if args.resume is not None: log_dir = os.path.relpath(args.resume) else: log_dir = os.path.join( "logs", args.log_dir, "{}_{}".format(datetime.datetime.now().isoformat(), args.log_name)) args.log_dir = log_dir # set dtype chainer.global_config.dtype = 'float32' if comm.rank == 0: # create log dir if not os.path.exists(log_dir): os.makedirs(log_dir, exist_ok=True) report_keys = ["epoch", "iteration", "loss/localizer/loss"] if args.use_memory_manager: memory_manager = DatasetClient() memory_manager.connect() train_kwargs = { "memory_manager": memory_manager, "base_name": "train_file" } # recognition_kwargs = {"memory_manager": memory_manager, "base_name": "text_recognition_file"} validation_kwargs = { "memory_manager": memory_manager, "base_name": "val_file" } else: train_kwargs = {"npz_file": args.train_file} # recognition_kwargs = {"npz_file": args.text_recognition_file} validation_kwargs = {"npz_file": args.val_file} if comm.rank == 0: train_dataset = TextRecognitionImageDataset( char_map=args.char_map, image_size=args.image_size, root=os.path.dirname(args.train_file), dtype=chainer.get_dtype(), use_imgaug=args.use_imgaug, transform_probability=0.4, keep_aspect_ratio=True, image_mode=args.image_mode, **train_kwargs, ) validation_dataset = TextRecognitionImageDataset( char_map=args.char_map, image_size=args.image_size, root=os.path.dirname(args.val_file), dtype=chainer.get_dtype(), transform_probability=0, keep_aspect_ratio=True, image_mode=args.image_mode, **validation_kwargs, ) else: train_dataset, validation_dataset = None, None train_dataset = scatter_dataset(train_dataset, comm) validation_dataset = scatter_dataset(validation_dataset, comm) # uncomment all commented parts of the code to train the model with extra recognizer training # text_recognition_dataset = TextRecognitionImageCharCropDataset( # char_map=args.char_map, # image_size=args.target_size, # root=os.path.dirname(args.text_recognition_file), # dtype=chainer.get_dtype(), # transform_probability=0, # image_mode=args.image_mode, # gpu_id=args.gpu, # reverse=False, # resize_after_load=False, # **recognition_kwargs, # ) data_iter = chainer.iterators.MultithreadIterator(train_dataset, args.batch_size) validation_iter = chainer.iterators.MultithreadIterator(validation_dataset, args.batch_size, repeat=False) # text_recognition_iter = chainer.iterators.MultithreadIterator(text_recognition_dataset, max(args.batch_size, 32)) localizer = LSTMTextLocalizer( Size(*args.target_size), num_bboxes_to_localize=train_dataset.num_chars_per_word, num_layers=args.num_layers, dropout_ratio=args.rotation_dropout_ratio, ) if args.resume_localizer is not None: load_pretrained_model(args.resume_localizer, localizer) recognizer = TransformerTextRecognizer( train_dataset.num_chars_per_word, train_dataset.num_words_per_image, train_dataset.num_classes, train_dataset.bos_token, num_layers=args.num_layers, ) if args.resume_recognizer is not None: load_pretrained_model(args.resume_recognizer, recognizer) models = [localizer, recognizer] if comm.rank == 0: tensorboard_handle = SummaryWriter(log_dir=args.log_dir, graph=None) else: tensorboard_handle = None localizer_optimizer = RAdam(alpha=args.learning_rate, beta1=0.9, beta2=0.98, eps=1e-9) localizer_optimizer = chainermn.create_multi_node_optimizer( localizer_optimizer, comm) localizer_optimizer.setup(localizer) localizer_optimizer.add_hook(chainer.optimizer_hooks.GradientClipping(2)) if args.save_gradient_information: localizer_optimizer.add_hook( TensorboardGradientPlotter(tensorboard_handle, args.log_interval), ) recognizer_optimizer = RAdam(alpha=args.learning_rate) recognizer_optimizer = chainermn.create_multi_node_optimizer( recognizer_optimizer, comm) recognizer_optimizer.setup(recognizer) optimizers = [localizer_optimizer, recognizer_optimizer] # log train information everytime we encouter a new epoch or args.log_interval iterations have been done log_interval_trigger = ( lambda trainer: (trainer.updater.is_new_epoch or trainer.updater.iteration % args. log_interval == 0) and trainer.updater.iteration > 0) updater_args = { "iterator": { 'main': data_iter, # 'rec': text_recognition_iter, }, "optimizer": { "opt_gen": localizer_optimizer, "opt_rec": recognizer_optimizer, }, "tensorboard_handle": tensorboard_handle, "tensorboard_log_interval": log_interval_trigger, "recognizer_update_interval": 1, "device": args.gpu, } updater = TransformerTextRecognitionUpdater(models=[localizer, recognizer], **updater_args) trainer = chainer.training.Trainer(updater, (args.num_epoch, 'epoch'), out=args.log_dir) data_to_log = { 'log_dir': args.log_dir, 'image_size': args.image_size, 'num_layers': args.num_layers, 'num_chars': train_dataset.num_chars_per_word, 'num_words': train_dataset.num_words_per_image, 'num_classes': train_dataset.num_classes, 'keep_aspect_ratio': train_dataset.keep_aspect_ratio, 'localizer': get_import_info(localizer), 'recognizer': get_import_info(recognizer), 'bos_token': train_dataset.bos_token, } for argument in filter(lambda x: not x.startswith('_'), dir(args)): data_to_log[argument] = getattr(args, argument) def backup_train_config(stats_cpu): if stats_cpu['iteration'] == args.log_interval: stats_cpu.update(data_to_log) if comm.rank == 0: for model in models: trainer.extend( extensions.snapshot_object( model, model.__class__.__name__ + '_{.updater.iteration}.npz'), trigger=lambda trainer: trainer.updater.is_new_epoch or trainer .updater.iteration % args.snapshot_interval == 0, ) trainer.extend(extensions.snapshot(filename='trainer_snapshot', autoload=args.resume is not None), trigger=(args.snapshot_interval, 'iteration')) evaluation_function = TextRecognitionEvaluatorFunction( localizer, recognizer, args.gpu, train_dataset.blank_label, train_dataset.char_map) trainer.extend( TextRecognitionTensorboardEvaluator( validation_iter, localizer, device=args.gpu, eval_func=evaluation_function, tensorboard_handle=tensorboard_handle, num_iterations=200, ), trigger=(args.test_interval, 'iteration'), ) # every epoch run the model on test datasets test_dataset_prefix = "test_dataset_" test_datasets = [ arg for arg in dir(args) if arg.startswith(test_dataset_prefix) ] for test_dataset_name in test_datasets: print( f"setting up testing for {test_dataset_name[len(test_dataset_prefix):]} dataset" ) dataset_path = getattr(args, test_dataset_name) if args.use_memory_manager: test_kwargs = { "memory_manager": memory_manager, "base_name": test_dataset_name } else: test_kwargs = {"npz_file": dataset_path} test_dataset = TextRecognitionImageDataset( char_map=args.char_map, image_size=args.image_size, root=os.path.dirname(dataset_path), dtype=chainer.get_dtype(), transform_probability=0, keep_aspect_ratio=True, image_mode=args.image_mode, **test_kwargs, ) test_iter = chainer.iterators.MultithreadIterator(test_dataset, args.batch_size, repeat=False) trainer.extend(TextRecognitionTensorboardEvaluator( test_iter, localizer, device=args.gpu, eval_func=evaluation_function, tensorboard_handle=tensorboard_handle, base_key=test_dataset_name[len(test_dataset_prefix):]), trigger=(args.snapshot_interval, 'iteration')) models.append(updater) logger = Logger( os.path.dirname(os.path.realpath(__file__)), args.log_dir, postprocess=backup_train_config, trigger=log_interval_trigger, exclusion_filters=['*logs*', '*.pyc', '__pycache__', '.git*'], resume=args.resume is not None, ) if args.test_image is not None: plot_image = train_dataset.load_image(args.test_image) gt_bbox = None else: plot_image = validation_dataset.get_example(0)['image'] gt_bbox = None bbox_plotter = TextRecognitionBBoxPlotter( plot_image, os.path.join(args.log_dir, 'bboxes'), args.target_size, send_bboxes=True, upstream_port=args.port, visualization_anchors=[ ["visual_backprop_anchors"], ], device=args.gpu, render_extracted_rois=True, num_rois_to_render=4, sort_rois=False, show_visual_backprop_overlay=True, visual_backprop_index=0, show_backprop_and_feature_vis=True, gt_bbox=gt_bbox, render_pca=False, log_name=args.log_name, char_map=train_dataset.char_map, blank_label=train_dataset.blank_label, predictors={ "localizer": localizer, "recognizer": recognizer, }, ) trainer.extend(bbox_plotter, trigger=(10, 'iteration')) trainer.extend(logger, trigger=log_interval_trigger) trainer.extend(extensions.PrintReport(report_keys, log_report='Logger'), trigger=log_interval_trigger) # learning rate shift after each epoch trainer.extend(extensions.ExponentialShift( "alpha", 0.1, optimizer=localizer_optimizer), trigger=(1, 'epoch')) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.dump_graph: trainer.extend( extensions.dump_graph('loss/localizer/loss', out_name='model.dot')) open_interactive_prompt( bbox_plotter=bbox_plotter, optimizer=optimizers, ) trainer.run()
def test_deprecation(): with chainer.testing.assert_warns(DeprecationWarning): chainermn.create_communicator('hierarchical') with chainer.testing.assert_warns(DeprecationWarning): chainermn.create_communicator('two_dimensional')
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', choices=('resnet50', 'resnet101')) parser.add_argument('--batchsize', type=int, default=16) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'resnet50': model = FasterRCNNFPNResNet50( n_fg_class=len(coco_bbox_label_names), mean='chainercv') copyparams(model.extractor.base, ResNet50(pretrained_model='imagenet', arch='he')) elif args.model == 'resnet101': model = FasterRCNNFPNResNet101( n_fg_class=len(coco_bbox_label_names), mean='chainercv') copyparams(model.extractor.base, ResNet101(pretrained_model='imagenet', arch='he')) model.use_preset('evaluate') train_chain = TrainChain(model) chainer.cuda.get_device_from_id(device).use() train_chain.to_gpu() train = TransformDataset( ConcatenatedDataset( COCOBboxDataset(split='train'), COCOBboxDataset(split='valminusminival'), ), ('img', 'bbox', 'label'), transform) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] train_iter = chainer.iterators.MultithreadIterator( train, args.batchsize // comm.size) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) optimizer.add_hook(WeightDecay(0.0001)) model.extractor.base.conv1.disable_update() model.extractor.base.res2.disable_update() for link in model.links(): if isinstance(link, L.BatchNormalization): link.disable_update() updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=converter, device=device) trainer = training.Trainer( updater, (90000 * 16 / args.batchsize, 'iteration'), args.out) def lr_schedule(updater): base_lr = 0.02 * args.batchsize / 16 warm_up_duration = 500 warm_up_rate = 1 / 3 iteration = updater.iteration if iteration < warm_up_duration: rate = warm_up_rate \ + (1 - warm_up_rate) * iteration / warm_up_duration elif iteration < 60000 * 16 / args.batchsize: rate = 1 elif iteration < 80000 * 16 / args.batchsize: rate = 0.1 else: rate = 0.01 return base_lr * rate trainer.extend(ManualScheduler('lr', lr_schedule)) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/rpn/loc', 'main/loss/rpn/conf', 'main/loss/head/loc', 'main/loss/head/conf']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend( extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(90000 * 16 / args.batchsize, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer, strict=False) trainer.run()
def main(): try: os.mkdir(args.snapshot_path) except: pass comm = chainermn.create_communicator() device = comm.intra_rank print("device", device, "/", comm.size) cuda.get_device(device).use() xp = cupy dataset = gqn.data.Dataset(args.dataset_path) hyperparams = HyperParameters() hyperparams.generator_share_core = args.generator_share_core hyperparams.generator_share_prior = args.generator_share_prior hyperparams.generator_generation_steps = args.generation_steps hyperparams.inference_share_core = args.inference_share_core hyperparams.inference_share_posterior = args.inference_share_posterior hyperparams.channels_chz = args.channels_chz hyperparams.generator_channels_u = args.channels_u hyperparams.inference_channels_map_x = args.channels_map_x hyperparams.pixel_n = args.pixel_n hyperparams.pixel_sigma_i = args.initial_pixel_sigma hyperparams.pixel_sigma_f = args.final_pixel_sigma if comm.rank == 0: hyperparams.save(args.snapshot_path) hyperparams.print() model = Model(hyperparams, snapshot_directory=args.snapshot_path) model.to_gpu() optimizer = Optimizer( model.parameters, communicator=comm, mu_i=args.initial_lr, mu_f=args.final_lr) if comm.rank == 0: optimizer.print() dataset_mean, dataset_std = dataset.load_mean_and_std() if comm.rank == 0: np.save(os.path.join(args.snapshot_path, "mean.npy"), dataset_mean) np.save(os.path.join(args.snapshot_path, "std.npy"), dataset_std) # avoid division by zero dataset_std += 1e-12 sigma_t = hyperparams.pixel_sigma_i pixel_var = xp.full( (args.batch_size, 3) + hyperparams.image_size, sigma_t**2, dtype="float32") pixel_ln_var = xp.full( (args.batch_size, 3) + hyperparams.image_size, math.log(sigma_t**2), dtype="float32") random.seed(0) subset_indices = list(range(len(dataset.subset_filenames))) current_training_step = 0 for iteration in range(args.training_iterations): mean_kld = 0 mean_nll = 0 total_batch = 0 subset_size_per_gpu = len(subset_indices) // comm.size start_time = time.time() for subset_loop in range(subset_size_per_gpu): random.shuffle(subset_indices) subset_index = subset_indices[comm.rank] subset = dataset.read(subset_index) iterator = gqn.data.Iterator(subset, batch_size=args.batch_size) for batch_index, data_indices in enumerate(iterator): # shape: (batch, views, height, width, channels) # range: [-1, 1] images, viewpoints = subset[data_indices] # preprocessing images = (images - dataset_mean) / dataset_std # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)) total_views = images.shape[1] # sample number of views num_views = random.choice(range(total_views)) query_index = random.choice(range(total_views)) if current_training_step == 0 and num_views == 0: num_views = 1 # avoid OpenMPI error if num_views > 0: r = model.compute_observation_representation( images[:, :num_views], viewpoints[:, :num_views]) else: r = xp.zeros( (args.batch_size, hyperparams.channels_r) + hyperparams.chrz_size, dtype="float32") r = chainer.Variable(r) query_images = images[:, query_index] query_viewpoints = viewpoints[:, query_index] # transfer to gpu query_images = to_gpu(query_images) query_viewpoints = to_gpu(query_viewpoints) h0_gen, c0_gen, u_0, h0_enc, c0_enc = model.generate_initial_state( args.batch_size, xp) loss_kld = 0 hl_enc = h0_enc cl_enc = c0_enc hl_gen = h0_gen cl_gen = c0_gen ul_enc = u_0 xq = model.inference_downsampler.downsample(query_images) for l in range(model.generation_steps): inference_core = model.get_inference_core(l) inference_posterior = model.get_inference_posterior(l) generation_core = model.get_generation_core(l) generation_piror = model.get_generation_prior(l) h_next_enc, c_next_enc = inference_core.forward_onestep( hl_gen, hl_enc, cl_enc, xq, query_viewpoints, r) mean_z_q = inference_posterior.compute_mean_z(hl_enc) ln_var_z_q = inference_posterior.compute_ln_var_z(hl_enc) ze_l = cf.gaussian(mean_z_q, ln_var_z_q) mean_z_p = generation_piror.compute_mean_z(hl_gen) ln_var_z_p = generation_piror.compute_ln_var_z(hl_gen) h_next_gen, c_next_gen, u_next_enc = generation_core.forward_onestep( hl_gen, cl_gen, ul_enc, ze_l, query_viewpoints, r) kld = gqn.nn.chainer.functions.gaussian_kl_divergence( mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p) loss_kld += cf.sum(kld) hl_gen = h_next_gen cl_gen = c_next_gen ul_enc = u_next_enc hl_enc = h_next_enc cl_enc = c_next_enc mean_x = model.generation_observation.compute_mean_x(ul_enc) negative_log_likelihood = gqn.nn.chainer.functions.gaussian_negative_log_likelihood( query_images, mean_x, pixel_var, pixel_ln_var) loss_nll = cf.sum(negative_log_likelihood) loss_nll /= args.batch_size loss_kld /= args.batch_size loss = loss_nll + loss_kld model.cleargrads() loss.backward() optimizer.update(current_training_step) if comm.rank == 0: printr( "Iteration {}: Subset {} / {}: Batch {} / {} - loss: nll: {:.3f} kld: {:.3f} - lr: {:.4e} - sigma_t: {:.6f}". format(iteration + 1, subset_loop * comm.size + 1, len(dataset), batch_index + 1, len(subset) // args.batch_size, float(loss_nll.data), float(loss_kld.data), optimizer.learning_rate, sigma_t)) sf = hyperparams.pixel_sigma_f si = hyperparams.pixel_sigma_i sigma_t = max( sf + (si - sf) * (1.0 - current_training_step / hyperparams.pixel_n), sf) pixel_var[...] = sigma_t**2 pixel_ln_var[...] = math.log(sigma_t**2) total_batch += 1 current_training_step += comm.size # current_training_step += 1 mean_kld += float(loss_kld.data) mean_nll += float(loss_nll.data) if comm.rank == 0: model.serialize(args.snapshot_path) if comm.rank == 0: elapsed_time = time.time() - start_time print( "\033[2KIteration {} - loss: nll: {:.3f} kld: {:.3f} - lr: {:.4e} - sigma_t: {:.6f} - step: {} - elapsed_time: {:.3f} min". format(iteration + 1, mean_nll / total_batch, mean_kld / total_batch, optimizer.learning_rate, sigma_t, current_training_step, elapsed_time / 60)) model.serialize(args.snapshot_path)
train_data = np.load(os.path.join(args.train, 'train.npz'))['data'] train_labels = np.load(os.path.join(args.train, 'train.npz'))['labels'] test_data = np.load(os.path.join(args.test, 'test.npz'))['data'] test_labels = np.load(os.path.join(args.test, 'test.npz'))['labels'] train = chainer.datasets.TupleDataset(train_data, train_labels) test = chainer.datasets.TupleDataset(test_data, test_labels) # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = L.Classifier(net.VGG(10)) comm = chainermn.create_communicator(args.communicator) # comm.inter_rank gives the rank of the node. This should only print on one node. if comm.inter_rank == 0: print('# Minibatch-size: {}'.format(args.batch_size)) print('# epoch: {}'.format(args.epochs)) print('# communicator: {}'.format(args.communicator)) # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. # comm.intra_rank gives the rank of the process on a given node. device = comm.intra_rank if num_gpus > 0 else -1 if device >= 0: chainer.cuda.get_device_from_id(device).use()
def check_mnist(gpu, display_log=True): epoch = 5 batchsize = 100 n_units = 100 comm = chainermn.create_communicator('naive') if gpu: device = comm.intra_rank chainer.cuda.get_device(device).use() else: device = -1 model = L.Classifier(MLP(n_units, 10)) if gpu: model.to_gpu() optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (epoch, 'epoch')) # Wrap standard Chainer evaluators by MultiNodeEvaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Add checkpointer. This is just to check checkpointing runs # without errors path = tempfile.mkdtemp(dir='/tmp', prefix=__name__ + "-tmp-") checkpointer = create_multi_node_checkpointer(name=__name__, comm=comm, path=path) trainer.extend(checkpointer, trigger=(1, 'epoch')) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0 and display_log: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ], out=sys.stderr), trigger=(1, 'epoch')) trainer.run() err = evaluator()['validation/main/accuracy'] assert err > 0.95 # Check checkpointer successfully finalized snapshot directory assert [] == os.listdir(path) os.removedirs(path)
parser.add_argument('--host', type=str, default=env.current_host) parser.add_argument('--num-gpus', type=int, default=env.num_gpus) parser.add_argument('--train', type=str, default=env.channel_input_dirs['train']) parser.add_argument('--test', type=str, default=env.channel_input_dirs['test']) args = parser.parse_args() train_file = np.load(os.path.join(args.train, 'train.npz')) test_file = np.load(os.path.join(args.test, 'test.npz')) logger.info('Current host: {}'.format(args.host)) communicator = 'naive' if args.num_gpus == 0 else args.communicator comm = chainermn.create_communicator(communicator) device = comm.intra_rank if args.num_gpus > 0 else -1 print('==========================================') print('Using {} communicator'.format(comm)) print('Num unit: {}'.format(args.units)) print('Num Minibatch-size: {}'.format(args.batch_size)) print('Num epoch: {}'.format(args.epochs)) print('==========================================') model = L.Classifier(MLP(args.units, 10)) if device >= 0: chainer.cuda.get_device(device).use() # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer(
def main(): parser = argparse.ArgumentParser( description='ChainerMN example: pipelined neural network') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank else: comm = chainermn.create_communicator('naive') device = -1 if comm.size != 2: raise ValueError( 'This example can only be executed on exactly 2 processes.') if comm.rank == 0: print('==========================================') if args.gpu: print('Using GPUs') print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') if comm.rank == 0: model = L.Classifier(MLP0(comm, args.unit)) elif comm.rank == 1: model = MLP1(comm, args.unit, 10) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Iterate dataset only on worker 0. train, test = chainer.datasets.get_mnist() if comm.rank == 1: train = chainermn.datasets.create_empty_dataset(train) test = chainermn.datasets.create_empty_dataset(test) train_iter = chainer.iterators.SerialIterator(train, args.batchsize, shuffle=False) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Some display and output extentions are necessary only for worker 0. if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser(description='''\ ChainerMN example: MNIST with automatic checkpoints enabled''') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--run-id', type=str, default='train-mnist-example', help='ID of the task name') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = L.Classifier(MLP(args.unit, 10)) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Enable checkpointer and recover from checkpoint if any checkpoint exists checkpointer = create_multi_node_checkpointer(name=args.run_id, comm=comm) checkpointer.maybe_load(trainer, optimizer) print("Rank", comm.rank, ": (Re)Starting from (epoch, iter) =", (trainer.updater.epoch, trainer.updater.iteration)) trainer.extend(checkpointer, trigger=(1000, 'iteration')) # Create a multi node evaluator from a standard Chainer evaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): # Check if GPU is available # (ImageNet example does not support CPU execution) if not chainer.cuda.available: raise RuntimeError("ImageNet requires GPU support.") archs = { 'alex': alex.Alex, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.add_argument('--communicator', default='hierarchical') parser.set_defaults(test=False) args = parser.parse_args() # Prepare ChainerMN communicator. comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) print('Using {} communicator'.format(args.communicator)) print('Using {} arch'.format(args.arch)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = archs[args.arch]() if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) chainer.cuda.get_device_from_id(device).use() # Make the GPU current model.to_gpu() # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. mean = np.load(args.mean) if comm.rank == 0: train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset( args.val, args.root, mean, model.insize, False) else: train = None val = None train = chainermn.scatter_dataset(train, comm, shuffle=True) val = chainermn.scatter_dataset(val, comm) # We need to change the start method of multiprocessing module if we are # using InfiniBand and MultiprocessIterator. This is because processes # often crash when calling fork if they are using Infiniband. # (c.f., https://www.open-mpi.org/faq/?category=tuning#fork-warning ) multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9), comm) optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) checkpoint_interval = (10, 'iteration') if args.test else (1, 'epoch') val_interval = (10, 'iteration') if args.test else (1, 'epoch') log_interval = (10, 'iteration') if args.test else (1, 'epoch') checkpointer = chainermn.create_multi_node_checkpointer( name='imagenet-example', comm=comm) checkpointer.maybe_load(trainer, optimizer) trainer.extend(checkpointer, trigger=checkpoint_interval) # Create a multi node evaluator from an evaluator. evaluator = TestModeEvaluator(val_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=val_interval) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def train(args, train_data, test_data, evaluator_type): required_args = [ 'dataset', 'class_names', 'logs_dir', 'min_size', 'max_size', 'anchor_scales', ] for arg_key in required_args: if not hasattr(args, arg_key): raise ValueError( 'args must contain required key: {}'.format(arg_key) ) assert evaluator_type in ['voc', 'coco'], \ 'Unsupported evaluator_type: {}'.format(evaluator_type) if args.multi_node: import chainermn comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank args.n_node = comm.inter_size args.n_gpu = comm.size chainer.cuda.get_device_from_id(device).use() else: if args.gpu is None: print( 'Option --gpu is required without --multi-node.', file=sys.stderr, ) sys.exit(1) args.n_node = 1 args.n_gpu = 1 chainer.cuda.get_device_from_id(args.gpu).use() device = args.gpu args.seed = 0 now = datetime.datetime.now() args.timestamp = now.isoformat() args.out = osp.join(args.logs_dir, now.strftime('%Y%m%d_%H%M%S')) args.batch_size = args.batch_size_per_gpu * args.n_gpu # lr: 0.00125 * 8 = 0.01 in original args.lr = 0.00125 * args.batch_size args.weight_decay = 0.0001 # lr / 10 at 120k iteration with # 160k iteration * 16 batchsize in original args.step_size = [ (120e3 / 180e3) * args.max_epoch, (160e3 / 180e3) * args.max_epoch, ] random.seed(args.seed) np.random.seed(args.seed) if args.pooling_func == 'align': pooling_func = cmr.functions.roi_align_2d elif args.pooling_func == 'pooling': pooling_func = cmr.functions.roi_pooling_2d elif args.pooling_func == 'resize': pooling_func = cmr.functions.crop_and_resize else: raise ValueError( 'Unsupported pooling_func: {}'.format(args.pooling_func) ) if args.initializer == 'normal': mask_initialW = chainer.initializers.Normal(0.01) elif args.initializer == 'he_normal': mask_initialW = chainer.initializers.HeNormal(fan_option='fan_out') else: raise ValueError( 'Unsupported initializer: {}'.format(args.initializer) ) if args.model in ['resnet50', 'resnet101']: n_layers = int(args.model.lstrip('resnet')) mask_rcnn = cmr.models.MaskRCNNResNet( n_layers=n_layers, n_fg_class=len(args.class_names), pooling_func=pooling_func, anchor_scales=args.anchor_scales, roi_size=args.roi_size, min_size=args.min_size, max_size=args.max_size, mask_initialW=mask_initialW, ) else: raise ValueError('Unsupported model: {}'.format(args.model)) model = cmr.models.MaskRCNNTrainChain(mask_rcnn) if args.multi_node or args.gpu >= 0: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) if args.multi_node: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) if args.model in ['resnet50', 'resnet101']: # ResNetExtractor.freeze_at is not enough to freeze params # since WeightDecay updates the param little by little. mask_rcnn.extractor.conv1.disable_update() mask_rcnn.extractor.bn1.disable_update() mask_rcnn.extractor.res2.disable_update() for link in mask_rcnn.links(): if isinstance(link, cmr.links.AffineChannel2D): link.disable_update() train_data = chainer.datasets.TransformDataset( train_data, cmr.datasets.MaskRCNNTransform(mask_rcnn), ) test_data = chainer.datasets.TransformDataset( test_data, cmr.datasets.MaskRCNNTransform(mask_rcnn, train=False), ) if args.multi_node: if comm.rank != 0: train_data = None test_data = None train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) test_data = chainermn.scatter_dataset(test_data, comm) # FIXME: MultiProcessIterator sometimes hangs train_iter = chainer.iterators.SerialIterator( train_data, batch_size=args.batch_size_per_gpu, ) test_iter = chainer.iterators.SerialIterator( test_data, batch_size=args.batch_size_per_gpu, repeat=False, shuffle=False, ) converter = functools.partial( cmr.datasets.concat_examples, padding=0, # img, bboxes, labels, masks, scales indices_concat=[0, 2, 3, 4], # img, _, labels, masks, scales indices_to_device=[0, 1], # img, bbox ) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=device, converter=converter, ) trainer = training.Trainer( updater, (args.max_epoch, 'epoch'), out=args.out, ) trainer.extend( extensions.ExponentialShift('lr', 0.1), trigger=training.triggers.ManualScheduleTrigger( args.step_size, 'epoch', ), ) eval_interval = 1, 'epoch' log_interval = 20, 'iteration' plot_interval = 0.1, 'epoch' print_interval = 20, 'iteration' if evaluator_type == 'voc': evaluator = cmr.extensions.InstanceSegmentationVOCEvaluator( test_iter, model.mask_rcnn, device=device, use_07_metric=True, label_names=args.class_names, ) elif evaluator_type == 'coco': evaluator = cmr.extensions.InstanceSegmentationCOCOEvaluator( test_iter, model.mask_rcnn, device=device, label_names=args.class_names, ) else: raise ValueError( 'Unsupported evaluator_type: {}'.format(evaluator_type) ) if args.multi_node: evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=eval_interval) if not args.multi_node or comm.rank == 0: # Save snapshot. trainer.extend( extensions.snapshot_object(model.mask_rcnn, 'snapshot_model.npz'), trigger=training.triggers.MaxValueTrigger( 'validation/main/map', eval_interval, ), ) # Dump params.yaml. args.git_hash = cmr.utils.git_hash() args.hostname = socket.gethostname() trainer.extend(fcn.extensions.ParamsReport(args.__dict__)) # Visualization. trainer.extend( cmr.extensions.InstanceSegmentationVisReport( test_iter, model.mask_rcnn, label_names=args.class_names, ), trigger=eval_interval, ) # Logging. trainer.extend( chainer.training.extensions.observe_lr(), trigger=log_interval, ) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend( extensions.PrintReport( [ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ], ), trigger=print_interval, ) trainer.extend(extensions.ProgressBar(update_interval=10)) # Plot. assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport( [ 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', ], file_name='loss.png', trigger=plot_interval, ), trigger=plot_interval, ) trainer.extend( extensions.PlotReport( ['validation/main/map'], file_name='accuracy.png', trigger=plot_interval, ), trigger=eval_interval, ) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def test_allreduce_persistent_cpu(self): comm = chainermn.create_communicator('naive') self._test(comm, ExampleModel())
def check_mnist(gpu, display_log=True): epoch = 5 batchsize = 100 n_units = 100 comm = chainermn.create_communicator('naive') if gpu: device = comm.intra_rank chainer.cuda.get_device_from_id(device).use() else: device = -1 model = L.Classifier(MLP(n_units, 10)) if gpu: model.to_gpu() optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater( train_iter, optimizer, device=device ) trainer = training.Trainer(updater, (epoch, 'epoch')) # Wrap standard Chainer evaluators by MultiNodeEvaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Add checkpointer. This is just to check checkpointing runs # without errors path = tempfile.mkdtemp(dir='/tmp', prefix=__name__ + "-tmp-") checkpointer = create_multi_node_checkpointer(name=__name__, comm=comm, path=path) trainer.extend(checkpointer, trigger=(1, 'epoch')) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0 and display_log: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'], out=sys.stderr), trigger=(1, 'epoch')) trainer.run() err = evaluator()['validation/main/accuracy'] assert err > 0.95 # Check checkpointer successfully finalized snapshot directory assert [] == os.listdir(path) os.removedirs(path)
def main(): parser = argparse.ArgumentParser(description='Chainer example: seq2seq') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--bleu', action="store_true", default=False, help='Report BLEU score') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--cache', '-c', default=None, help='Directory to cache pre-processed dataset') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1024, help='Number of units') parser.add_argument('--communicator', default='hierarchical', help="Type of communicator") parser.add_argument('--stop', '-s', type=str, default="15e", help='Stop trigger (ex. "500i", "15e")') parser.add_argument('--input', '-i', type=str, default='wmt', help='Input directory') parser.add_argument('--optimizer', type=str, default="adam()", help="Optimizer and its argument") parser.add_argument('--out', '-o', default='result', help='Directory to output the result') args = parser.parse_args() # Prepare ChainerMN communicator if args.gpu: comm = chainermn.create_communicator('hierarchical') dev = comm.intra_rank else: comm = chainermn.create_communicator('naive') dev = -1 if comm.mpi_comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size())) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('==========================================') # Rank 0 prepares all data if comm.rank == 0: if args.cache and not os.path.exists(args.cache): os.mkdir(args.cache) # Read source data bt = time.time() if args.cache: cache_file = os.path.join(args.cache, 'source.pickle') source_vocab, source_data = cached_call(cache_file, read_source, args.input, args.cache) else: source_vocab, source_data = read_source(args.input, args.cache) et = time.time() print("RD source done. {:.3f} [s]".format(et - bt)) sys.stdout.flush() # Read target data bt = time.time() if args.cache: cache_file = os.path.join(args.cache, 'target.pickle') target_vocab, target_data = cached_call(cache_file, read_target, args.input, args.cache) else: target_vocab, target_data = read_target(args.input, args.cache) et = time.time() print("RD target done. {:.3f} [s]".format(et - bt)) sys.stdout.flush() print('Original training data size: %d' % len(source_data)) train_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) < 50 and 0 < len(t) < 50] print('Filtered training data size: %d' % len(train_data)) en_path = os.path.join(args.input, 'dev', 'newstest2013.en') source_data = europal.make_dataset(en_path, source_vocab) fr_path = os.path.join(args.input, 'dev', 'newstest2013.fr') target_data = europal.make_dataset(fr_path, target_vocab) assert(len(source_data) == len(target_data)) test_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) and 0 < len(t)] source_ids = {word: index for index, word in enumerate(source_vocab)} target_ids = {word: index for index, word in enumerate(target_vocab)} else: # target_data, source_data = None, None train_data, test_data = None, None target_ids, source_ids = None, None # Print GPU id for i in range(0, comm.size): if comm.rank == i: print("Rank {} GPU: {}".format(comm.rank, dev)) sys.stdout.flush() comm.mpi_comm.Barrier() # broadcast id- > word dictionary source_ids = comm.mpi_comm.bcast(source_ids, root=0) target_ids = comm.mpi_comm.bcast(target_ids, root=0) target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} if comm.rank == 0: print("target_words : {}".format(len(target_words))) print("source_words : {}".format(len(source_words))) model = Seq2seq(3, len(source_ids), len(target_ids), args.unit) if dev >= 0: chainer.cuda.get_device(dev).use() model.to_gpu(dev) # determine the stop trigger m = re.match(r'^(\d+)e$', args.stop) if m: trigger = (int(m.group(1)), 'epoch') else: m = re.match(r'^(\d+)i$', args.stop) if m: trigger = (int(m.group(1)), 'iteration') else: if comm.rank == 0: sys.stderr.write("Error: unknown stop trigger: {}".format( args.stop)) exit(-1) if comm.rank == 0: print("Trigger: {}".format(trigger)) optimizer = chainermn.create_multi_node_optimizer( create_optimizer(args.optimizer), comm) optimizer.setup(model) # Broadcast dataset # Sanity check of train_data train_data = chainermn.scatter_dataset(train_data, comm) test_data = chainermn.scatter_dataset(test_data, comm) train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize, shuffle=False) updater = training.StandardUpdater( train_iter, optimizer, converter=convert, device=dev) trainer = training.Trainer(updater, trigger, out=args.out) trainer.extend(chainermn.create_multi_node_evaluator( BleuEvaluator(model, test_data, device=dev, comm=comm), comm)) def translate_one(source, target): words = europal.split_sentence(source) print('# source : ' + ' '.join(words)) x = model.xp.array( [source_ids.get(w, 1) for w in words], 'i') ys = model.translate([x])[0] words = [target_words[y] for y in ys] print('# result : ' + ' '.join(words)) print('# expect : ' + target) # @chainer.training.make_extension(trigger=(200, 'iteration')) def translate(trainer): translate_one( 'Who are we ?', 'Qui sommes-nous?') translate_one( 'And it often costs over a hundred dollars ' + 'to obtain the required identity card .', 'Or, il en coûte souvent plus de cent dollars ' + 'pour obtenir la carte d\'identité requise.') source, target = test_data[numpy.random.choice(len(test_data))] source = ' '.join([source_words.get(i, '') for i in source]) target = ' '.join([target_words.get(i, '') for i in target]) translate_one(source, target) if comm.rank == 0: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) report = extensions.PrintReport(['epoch', 'iteration', 'main/loss', 'main/perp', 'validation/main/bleu', 'elapsed_time']) trainer.extend(report, trigger=(1, 'epoch')) comm.mpi_comm.Barrier() if comm.rank == 0: print('start training') sys.stdout.flush() trainer.run()
def setUp(self): self.communicator = chainermn.create_communicator('naive') if self.communicator.size < 2: pytest.skip('This test is for multinode only')
def comm() -> CommunicatorBase: if not _available: pytest.skip("This test requires ChainerMN.") return chainermn.create_communicator("naive")
def main(): parser = argparse.ArgumentParser(description='Chainer example: seq2seq') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--bleu', action='store_true', default=False, help='Report BLEU score') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--cache', '-c', default=None, help='Directory to cache pre-processed dataset') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1024, help='Number of units') parser.add_argument('--communicator', default='hierarchical', help='Type of communicator') parser.add_argument('--stop', '-s', type=str, default='15e', help='Stop trigger (ex. "500i", "15e")') parser.add_argument('--input', '-i', type=str, default='wmt', help='Input directory') parser.add_argument('--optimizer', type=str, default='adam()', help='Optimizer and its argument') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') args = parser.parse_args() # Prepare ChainerMN communicator if args.gpu: comm = chainermn.create_communicator('hierarchical') dev = comm.intra_rank else: comm = chainermn.create_communicator('naive') dev = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('==========================================') # Rank 0 prepares all data if comm.rank == 0: if args.cache and not os.path.exists(args.cache): os.mkdir(args.cache) # Read source data bt = time.time() if args.cache: cache_file = os.path.join(args.cache, 'source.pickle') source_vocab, source_data = cached_call(cache_file, read_source, args.input, args.cache) else: source_vocab, source_data = read_source(args.input, args.cache) et = time.time() print('RD source done. {:.3f} [s]'.format(et - bt)) sys.stdout.flush() # Read target data bt = time.time() if args.cache: cache_file = os.path.join(args.cache, 'target.pickle') target_vocab, target_data = cached_call(cache_file, read_target, args.input, args.cache) else: target_vocab, target_data = read_target(args.input, args.cache) et = time.time() print('RD target done. {:.3f} [s]'.format(et - bt)) sys.stdout.flush() print('Original training data size: %d' % len(source_data)) train_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) < 50 and 0 < len(t) < 50] print('Filtered training data size: %d' % len(train_data)) en_path = os.path.join(args.input, 'dev', 'newstest2013.en') source_data = europal.make_dataset(en_path, source_vocab) fr_path = os.path.join(args.input, 'dev', 'newstest2013.fr') target_data = europal.make_dataset(fr_path, target_vocab) assert(len(source_data) == len(target_data)) test_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) and 0 < len(t)] source_ids = {word: index for index, word in enumerate(source_vocab)} target_ids = {word: index for index, word in enumerate(target_vocab)} else: # target_data, source_data = None, None train_data, test_data = None, None target_ids, source_ids = None, None # Print GPU id for i in range(0, comm.size): if comm.rank == i: print('Rank {} GPU: {}'.format(comm.rank, dev)) sys.stdout.flush() comm.mpi_comm.Barrier() # broadcast id- > word dictionary source_ids = comm.bcast_obj(source_ids, root=0) target_ids = comm.bcast_obj(target_ids, root=0) target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} if comm.rank == 0: print('target_words : {}'.format(len(target_words))) print('source_words : {}'.format(len(source_words))) model = Seq2seq(3, len(source_ids), len(target_ids), args.unit) if dev >= 0: chainer.cuda.get_device_from_id(dev).use() model.to_gpu(dev) # determine the stop trigger m = re.match(r'^(\d+)e$', args.stop) if m: trigger = (int(m.group(1)), 'epoch') else: m = re.match(r'^(\d+)i$', args.stop) if m: trigger = (int(m.group(1)), 'iteration') else: if comm.rank == 0: sys.stderr.write('Error: unknown stop trigger: {}'.format( args.stop)) exit(-1) if comm.rank == 0: print('Trigger: {}'.format(trigger)) optimizer = chainermn.create_multi_node_optimizer( create_optimizer(args.optimizer), comm) optimizer.setup(model) # Broadcast dataset # Sanity check of train_data train_data = chainermn.scatter_dataset(train_data, comm) test_data = chainermn.scatter_dataset(test_data, comm) train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize, shuffle=False) updater = training.StandardUpdater( train_iter, optimizer, converter=convert, device=dev) trainer = training.Trainer(updater, trigger, out=args.out) trainer.extend(chainermn.create_multi_node_evaluator( BleuEvaluator(model, test_data, device=dev, comm=comm), comm)) def translate_one(source, target): words = europal.split_sentence(source) print('# source : ' + ' '.join(words)) x = model.xp.array( [source_ids.get(w, 1) for w in words], numpy.int32) ys = model.translate([x])[0] words = [target_words[y] for y in ys] print('# result : ' + ' '.join(words)) print('# expect : ' + target) # @chainer.training.make_extension(trigger=(200, 'iteration')) def translate(trainer): translate_one( 'Who are we ?', 'Qui sommes-nous?') translate_one( 'And it often costs over a hundred dollars ' + 'to obtain the required identity card .', 'Or, il en coûte souvent plus de cent dollars ' + 'pour obtenir la carte d\'identité requise.') source, target = test_data[numpy.random.choice(len(test_data))] source = ' '.join([source_words.get(i, '') for i in source]) target = ' '.join([target_words.get(i, '') for i in target]) translate_one(source, target) if comm.rank == 0: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) report = extensions.PrintReport(['epoch', 'iteration', 'main/loss', 'main/perp', 'validation/main/bleu', 'elapsed_time']) trainer.extend(report, trigger=(1, 'epoch')) comm.mpi_comm.Barrier() if comm.rank == 0: print('start training') sys.stdout.flush() trainer.run()
def test_allreduce_persistent_cpu(self): comm = chainermn.create_communicator('naive') model = ExampleModel() self._test(comm, model, False, False) # CPU test (numpy) self._test(comm, model, False, True) # CPU test (ChainerX)
def main(): parser = argparse.ArgumentParser(description='ChainerMN example: DCGAN') parser.add_argument('--batchsize', '-b', type=int, default=50, help='Number of images in each mini-batch') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--epoch', '-e', type=int, default=1000, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--dataset', '-i', default='', help='Directory of image files. Default is cifar-10.') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--gen_model', '-r', default='', help='Use pre-trained generator for training') parser.add_argument('--dis_model', '-d', default='', help='Use pre-trained discriminator for training') parser.add_argument('--n_hidden', '-n', type=int, default=100, help='Number of hidden units (z)') parser.add_argument('--seed', type=int, default=0, help='Random seed of z at visualization stage') parser.add_argument('--snapshot_interval', type=int, default=1000, help='Interval of snapshot') parser.add_argument('--display_interval', type=int, default=100, help='Interval of displaying log to console') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num hidden unit: {}'.format(args.n_hidden)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') # Set up a neural network to train gen = Generator(n_hidden=args.n_hidden) dis = Discriminator() if device >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(device).use() gen.to_gpu() # Copy the model to the GPU dis.to_gpu() # Setup an optimizer def make_optimizer(model, comm, alpha=0.0002, beta1=0.5): # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(alpha=alpha, beta1=beta1), comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001), 'hook_dec') return optimizer opt_gen = make_optimizer(gen, comm) opt_dis = make_optimizer(dis, comm) # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. if comm.rank == 0: if args.dataset == '': # Load the CIFAR10 dataset if args.dataset is not specified train, _ = chainer.datasets.get_cifar10(withlabel=False, scale=255.) else: all_files = os.listdir(args.dataset) image_files = [f for f in all_files if ('png' in f or 'jpg' in f)] print('{} contains {} image files' .format(args.dataset, len(image_files))) train = chainer.datasets\ .ImageDataset(paths=image_files, root=args.dataset) else: train = None train = chainermn.scatter_dataset(train, comm) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) # Set up a trainer updater = DCGANUpdater( models=(gen, dis), iterator=train_iter, optimizer={ 'gen': opt_gen, 'dis': opt_dis}, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: snapshot_interval = (args.snapshot_interval, 'iteration') display_interval = (args.display_interval, 'iteration') # Save only model parameters. # `snapshot` extension will save all the trainer module's attribute, # including `train_iter`. # However, `train_iter` depends on scattered dataset, which means that # `train_iter` may be different in each process. # Here, instead of saving whole trainer module, only the network models # are saved. trainer.extend(extensions.snapshot_object( gen, 'gen_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( dis, 'dis_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=display_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'gen/loss', 'dis/loss', 'elapsed_time', ]), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend( out_generated_image( gen, dis, 10, 10, args.seed, args.out), trigger=snapshot_interval) # Start the training using pre-trained model, saved by snapshot_object if args.gen_model: chainer.serializers.load_npz(args.gen_model, gen) if args.dis_model: chainer.serializers.load_npz(args.dis_model, dis) # Run the training trainer.run()
def setup(self): self.communicator = chainermn.create_communicator('naive') if self.communicator.size != 2: pytest.skip('This test is for two processes')
def setUp(self): self.communicator = chainermn.create_communicator('naive') warnings.filterwarnings(action='always', category=DeprecationWarning)
def train(args, dataset_train, dataset_test): random.seed(0) np.random.seed(0) if args.multi_node: import chainermn comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank args.n_gpu = comm.size args.inter_size = comm.inter_size args.intra_size = comm.intra_size args.batch_size_total = args.batch_size * args.n_gpu chainer.cuda.get_device(device).use() else: args.batch_size_total = args.batch_size chainer.cuda.get_device_from_id(args.gpu).use() device = args.gpu # Model G_A = chainer_cyclegan.models.ResnetGenerator() G_B = chainer_cyclegan.models.ResnetGenerator() D_A = chainer_cyclegan.models.NLayerDiscriminator() D_B = chainer_cyclegan.models.NLayerDiscriminator() if args.multi_node or args.gpu >= 0: G_A.to_gpu() G_B.to_gpu() D_A.to_gpu() D_B.to_gpu() # Optimizer args.lr = 0.0002 args.beta1 = 0.5 args.beta2 = 0.999 optimizer_G_A = chainer.optimizers.Adam(alpha=args.lr, beta1=args.beta1, beta2=args.beta2) optimizer_G_B = chainer.optimizers.Adam(alpha=args.lr, beta1=args.beta1, beta2=args.beta2) optimizer_D_A = chainer.optimizers.Adam(alpha=args.lr, beta1=args.beta1, beta2=args.beta2) optimizer_D_B = chainer.optimizers.Adam(alpha=args.lr, beta1=args.beta1, beta2=args.beta2) if args.multi_node: optimizer_G_A = chainermn.create_multi_node_optimizer( optimizer_G_A, comm) optimizer_G_B = chainermn.create_multi_node_optimizer( optimizer_G_B, comm) optimizer_D_A = chainermn.create_multi_node_optimizer( optimizer_D_A, comm) optimizer_D_B = chainermn.create_multi_node_optimizer( optimizer_D_B, comm) optimizer_G_A.setup(G_A) optimizer_G_B.setup(G_B) optimizer_D_A.setup(D_A) optimizer_D_B.setup(D_B) # Dataset if args.multi_node: if comm.rank != 0: dataset_train = None dataset_test = None dataset_train = chainermn.scatter_dataset(dataset_train, comm, shuffle=True) dataset_test = chainermn.scatter_dataset(dataset_test, comm) iter_train = chainer.iterators.MultiprocessIterator( dataset_train, batch_size=args.batch_size, n_processes=4, shared_mem=10**7) iter_test = chainer.iterators.SerialIterator(dataset_test, batch_size=args.batch_size, repeat=False, shuffle=False) # Updater epoch_count = 1 niter = 100 niter_decay = 100 updater = chainer_cyclegan.updaters.CycleGANUpdater( iterator=iter_train, optimizer=dict( G_A=optimizer_G_A, G_B=optimizer_G_B, D_A=optimizer_D_A, D_B=optimizer_D_B, ), device=device, ) # Trainer out = osp.join('logs/train_cyclegan', datetime.datetime.now().strftime('%Y%m%d_%H%M%S')) trainer = training.Trainer(updater, (niter + niter_decay, 'epoch'), out=out) @training.make_extension(trigger=(1, 'epoch')) def tune_learning_rate(trainer): epoch = trainer.updater.epoch lr_rate = 1.0 - (max(0, epoch + 1 + epoch_count - niter) / float(niter_decay + 1)) trainer.updater.get_optimizer('G_A').alpha *= lr_rate trainer.updater.get_optimizer('G_B').alpha *= lr_rate trainer.updater.get_optimizer('D_A').alpha *= lr_rate trainer.updater.get_optimizer('D_B').alpha *= lr_rate trainer.extend(tune_learning_rate) if not args.multi_node or comm.rank == 0: trainer.extend( chainer_cyclegan.extensions.CycleGANEvaluator(iter_test, device=device)) trainer.extend(extensions.snapshot_object( target=G_A, filename='G_A_{.updater.epoch:08}.npz'), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object( target=G_B, filename='G_B_{.updater.epoch:08}.npz'), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object( target=D_A, filename='D_A_{.updater.epoch:08}.npz'), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object( target=D_B, filename='D_B_{.updater.epoch:08}.npz'), trigger=(1, 'epoch')) trainer.extend(extensions.LogReport(trigger=(20, 'iteration'))) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'elapsed_time', 'loss_gen_A', 'loss_gen_B', 'loss_dis_A', 'loss_dis_B', 'loss_cyc_A', 'loss_cyc_B', 'loss_idt_A', 'loss_idt_B', ])) trainer.extend(contrib.extensions.ParamsReport(args.__dict__)) trainer.extend(extensions.ProgressBar(update_interval=10)) assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport(y_keys=['loss_gen_A', 'loss_gen_B'], x_key='iteration', file_name='loss_gen.png', trigger=(100, 'iteration'))) trainer.extend( extensions.PlotReport(y_keys=['loss_dis_A', 'loss_dis_B'], x_key='iteration', file_name='loss_dis.png', trigger=(100, 'iteration'))) trainer.extend( extensions.PlotReport(y_keys=['loss_cyc_A', 'loss_cyc_B'], x_key='iteration', file_name='loss_cyc.png', trigger=(100, 'iteration'))) trainer.extend( extensions.PlotReport(y_keys=['loss_idt_A', 'loss_idt_B'], x_key='iteration', file_name='loss_idt.png', trigger=(100, 'iteration'))) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerMN example: pipelined neural network') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank else: comm = chainermn.create_communicator('naive') device = -1 if comm.size != 2: raise ValueError( 'This example can only be executed on exactly 2 processes.') if comm.rank == 0: print('==========================================') if args.gpu: print('Using GPUs') print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') if comm.rank == 0: model = L.Classifier(MLP0(comm, args.unit)) elif comm.rank == 1: model = MLP1(comm, args.unit, 10) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Iterate dataset only on worker 0. train, test = chainer.datasets.get_mnist() if comm.rank == 1: train = chainermn.datasets.create_empty_dataset(train) test = chainermn.datasets.create_empty_dataset(test) train_iter = chainer.iterators.SerialIterator( train, args.batchsize, shuffle=False) test_iter = chainer.iterators.SerialIterator( test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Some display and output extentions are necessary only for worker 0. if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean)) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize // comm.size, n_processes=2) if comm.rank == 0: test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) if comm.rank == 0: trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config_path', type=str, default='configs/base.yml', help='path to config file') parser.add_argument('--data_dir', type=str, default='./data/imagenet') parser.add_argument('--results_dir', type=str, default='./results/gans', help='directory to save the results to') parser.add_argument('--inception_model_path', type=str, default='./datasets/inception_model', help='path to the inception model') parser.add_argument('--snapshot', type=str, default='', help='path to the snapshot') parser.add_argument('--loaderjob', type=int, help='number of parallel data loading processes') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') args = parser.parse_args() config = yaml_utils.Config(yaml.load(open(args.config_path))) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank chainer.cuda.get_device_from_id(device).use() print("init") if comm.rank == 0: print('==========================================') print('Using {} communicator'.format(args.communicator)) print('==========================================') # Model gen, dis = load_models(config) gen.to_gpu() dis.to_gpu() models = {"gen": gen, "dis": dis} # Optimizer opt_gen = make_optimizer(gen, comm, alpha=config.adam['alpha'], beta1=config.adam['beta1'], beta2=config.adam['beta2']) opt_dis = make_optimizer(dis, comm, alpha=config.adam['alpha'], beta1=config.adam['beta1'], beta2=config.adam['beta2']) opts = {"opt_gen": opt_gen, "opt_dis": opt_dis} # Dataset config['dataset']['args']['root'] = args.data_dir if comm.rank == 0: dataset = yaml_utils.load_dataset(config) else: _ = yaml_utils.load_dataset( config) # Dummy, for adding path to the dataset module dataset = None dataset = chainermn.scatter_dataset(dataset, comm) # Iterator multiprocessing.set_start_method('forkserver') iterator = chainer.iterators.MultiprocessIterator( dataset, config.batchsize, n_processes=args.loaderjob) kwargs = config.updater['args'] if 'args' in config.updater else {} kwargs.update({ 'models': models, 'iterator': iterator, 'optimizer': opts, 'device': device, }) updater = yaml_utils.load_updater_class(config) updater = updater(**kwargs) out = args.results_dir if comm.rank == 0: create_result_dir(out, args.config_path, config) trainer = training.Trainer(updater, (config.iteration, 'iteration'), out=out) report_keys = ["loss_dis", "loss_gen", "inception_mean", "inception_std"] if comm.rank == 0: # Set up logging trainer.extend(extensions.snapshot(), trigger=(config.snapshot_interval, 'iteration')) for m in models.values(): trainer.extend(extensions.snapshot_object( m, m.__class__.__name__ + '_{.updater.iteration}.npz'), trigger=(config.snapshot_interval, 'iteration')) trainer.extend( extensions.LogReport(keys=report_keys, trigger=(config.display_interval, 'iteration'))) trainer.extend(extensions.PrintReport(report_keys), trigger=(config.display_interval, 'iteration')) trainer.extend(sample_generate_conditional(gen, out, n_classes=gen.n_classes), trigger=(config.evaluation_interval, 'iteration'), priority=extension.PRIORITY_WRITER) trainer.extend(sample_generate_light(gen, out, rows=10, cols=10), trigger=(config.evaluation_interval // 10, 'iteration'), priority=extension.PRIORITY_WRITER) trainer.extend(calc_inception(gen, n_ims=5000, splits=1, path=args.inception_model_path), trigger=(config.evaluation_interval, 'iteration'), priority=extension.PRIORITY_WRITER) trainer.extend( extensions.ProgressBar( update_interval=config.progressbar_interval)) ext_opt_gen = extensions.LinearShift( 'alpha', (config.adam['alpha'], 0.), (config.iteration_decay_start, config.iteration), opt_gen) ext_opt_dis = extensions.LinearShift( 'alpha', (config.adam['alpha'], 0.), (config.iteration_decay_start, config.iteration), opt_dis) trainer.extend(ext_opt_gen) trainer.extend(ext_opt_dis) if args.snapshot: print("Resume training with snapshot:{}".format(args.snapshot)) chainer.serializers.load_npz(args.snapshot, trainer) # Run the training print("start training") trainer.run()
def test_deprecation(): with chainer.testing.assert_warns(DeprecationWarning): chainermn.create_communicator('hierarchical') with chainer.testing.assert_warns(DeprecationWarning): chainermn.create_communicator('two_dimensional')
def main(): parser = argparse.ArgumentParser(description='ChainerMN example: VGG16') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', action='store_true', default=False, help='use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') args = parser.parse_args() # Create ChainerMN communicator. if args.gpu: comm = chainermn.create_communicator('hierarchical') device = comm.rank else: comm = chainermn.create_communicator('naive') device = -1 if comm.rank == 0: print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Load the CIFAR10 dataset if args.dataset == 'cifar10': class_labels = 10 train, test = chainer.datasets.get_cifar10() elif args.dataset == 'cifar100': class_labels = 100 train, test = chainer.datasets.get_cifar100() else: raise RuntimeError('Invalid dataset choice.') model = L.Classifier(VGG.VGG(comm, class_labels)) if args.gpu: # Make a specified GPU current chainer.cuda.get_device_from_id(device).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) if comm.rank != 0: train = chainermn.datasets.create_empty_dataset(train) test = chainermn.datasets.create_empty_dataset(test) train_iter = chainermn.iterators.create_multi_node_iterator( chainer.iterators.SerialIterator(train, args.batchsize), comm) test_iter = chainermn.iterators.create_multi_node_iterator( chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False), comm) # Set up a trainer updater = training.StandardUpdater( train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=device)) if comm.rank == 0: # Dump a computational graph from 'loss' variable # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.DumpGraph('main/loss')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) # Run the training trainer.run()