train_iter = chainer.iterators.SerialIterator(train, args.batchsize) else: train_iters = [ MultithreadIterator(i, int(args.batchsize / len(devices)), n_threads=4) for i in split_dataset_n_random(train, len(devices))] test_iter = MultithreadIterator(test, args.batchsize, repeat=False, shuffle=False, n_threads=4) # Set up a trainer if len(args.gpus) < 2: updater = training.StandardUpdater(train_iter, optimizer, device=args.gpus[0]) else: updater = MultiprocessParallelUpdater(train_iters, optimizer, devices=devices) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir) if args.cosine: trainer.extend( CosineAnnealing('lr', int(args.epoch), len(train) / args.batchsize, eta_min=args.eta_min, init=args.lr)) else: trainer.extend( extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger( [int(args.epoch * 0.50), int(args.epoch * 0.75)], 'epoch'))
localization_net.disable_update() # if we are using more than one GPU, we need to evenly split the datasets if len(args.gpus) > 1: gpu_datasets = split_dataset_n_random(train_dataset, len(args.gpus)) if not len(gpu_datasets[0]) == len(gpu_datasets[-1]): adapted_second_split = split_dataset(gpu_datasets[-1], len(gpu_datasets[0]))[0] gpu_datasets[-1] = adapted_second_split else: gpu_datasets = [train_dataset] train_iterators = [chainer.iterators.MultiprocessIterator(dataset, args.batch_size) for dataset in gpu_datasets] validation_iterator = chainer.iterators.MultiprocessIterator(validation_dataset, args.batch_size, repeat=False) # use the MultiProcessParallelUpdater in order to harness the full power of data parallel computation updater = MultiprocessParallelUpdater(train_iterators, optimizer, devices=args.gpus) log_dir = os.path.join(args.log_dir, "{}_{}".format(datetime.datetime.now().isoformat(), args.log_name)) args.log_dir = log_dir # backup current file if not os.path.exists(log_dir): os.makedirs(log_dir, exist_ok=True) shutil.copy(__file__, log_dir) # backup all necessary configuration params report = { 'log_dir': log_dir, 'image_size': image_size, 'target_size': target_shape, 'localization_net': [localization_net.__class__.__name__, get_definition_filename(localization_net)],
train_iterators = [ MultiprocessIterator(dataset, args.batch_size, n_processes=args.num_processes) for dataset in gpu_datasets ] validation_iterator = MultiprocessIterator( validation_dataset, args.batch_size, n_processes=args.num_processes, repeat=False) updater = MultiprocessParallelUpdater( train_iterators, optimizer, devices=args.gpus, converter=get_concat_and_pad_examples(args.blank_label)) updater.setup_workers() log_dir = os.path.join( args.log_dir, "{}_{}".format(datetime.datetime.now().isoformat(), args.log_name)) args.log_dir = log_dir # backup current file if not os.path.exists(log_dir): os.makedirs(log_dir, exist_ok=True) shutil.copy(__file__, log_dir) # log all necessary configuration params