def _create_dataset(self): name = self.name seed = self.seed if name == 'mnist': train, test = chainer.datasets.get_mnist(ndim=3, rgb_format=True) elif name == 'usps1800': train, test = get_usps1800(seed=seed) elif name == 'mnist2000': train, test = get_mnist2000(seed=seed) elif name == 'svhn': train, test = chainer.datasets.get_svhn() elif name == 'mnistm': train, test = get_mnistm() elif name == 'amazon': train = DirectoryParsingLabelDataset('./data/Office/amazon/images') test = copy.deepcopy(train) elif name == 'dslr': train = DirectoryParsingLabelDataset('./data/Office/dslr/images') test = copy.deepcopy(train) elif name == 'webcam': train = DirectoryParsingLabelDataset('./data/Office/webcam/images') test = copy.deepcopy(train) elif name == 'visda_train': train = DirectoryParsingLabelDataset('./data/VisDA/train') test = copy.deepcopy(train) elif name == 'visda_validation': train = DirectoryParsingLabelDataset('./data/VisDA/validation') test = copy.deepcopy(train) else: sys.exit("The domain name {} is wrong.".format(self.name)) return train, test
def test_directory_parsing_label_dataset(self): dataset = DirectoryParsingLabelDataset( self.tmp_dir, color=self.color) if self.depth == 1: expected_legnth = self.n_img_per_class * self.n_class elif self.depth == 2: expected_legnth =\ self.n_img_per_class * self.n_sub_directory * self.n_class self.assertEqual(len(dataset), expected_legnth) assert_is_label_dataset(dataset, self.n_class, color=self.color) label_names = directory_parsing_label_names(self.tmp_dir) self.assertEqual( label_names, ['class_{}'.format(i) for i in range(self.n_class)]) if self.depth == 1: self.assertEqual( dataset.img_paths, ['{}/class_{}/img{}.{}'.format(self.tmp_dir, i, j, self.suffix) for i in range(self.n_class) for j in range(self.n_img_per_class)]) elif self.depth == 2: self.assertEqual( dataset.img_paths, ['{}/class_{}/nested_{}/img{}.{}'.format( self.tmp_dir, i, j, k, self.suffix) for i in range(self.n_class) for j in range(self.n_sub_directory) for k in range(self.n_img_per_class)])
def __init__(self, root=os.path.join("~", ".chainer", "datasets", "imagenet"), mode="train", transform=None): split = "train" if mode == "train" else "val" root = os.path.join(root, split) self.transform = transform self.base = DirectoryParsingLabelDataset(root)
def test_numerical_sort(self): dataset = DirectoryParsingLabelDataset( self.tmp_dir, numerical_sort=True) assert_is_label_dataset(dataset, self.n_class) label_names = directory_parsing_label_names( self.tmp_dir, numerical_sort=True) self.assertEqual( label_names, ['{}'.format(i) for i in range(self.n_class)])
def tuple2array(url): t = DirectoryParsingLabelDataset(url) new = [np.arange(3 * 32 * 32, dtype='float32').reshape(3, 32, 32)] * len(t) for i in range(len(t)): new[i] = t[i][0] return new
def __init__(self, root, scale_size=256, crop_size=224, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.base = DirectoryParsingLabelDataset(root) self.scale_size = scale_size if isinstance(crop_size, int): crop_size = (crop_size, crop_size) self.crop_size = crop_size self.mean = np.array(mean, np.float32)[:, np.newaxis, np.newaxis] self.std = np.array(std, np.float32)[:, np.newaxis, np.newaxis]
def main(): parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('val', help='Path to root of the validation dataset') parser.add_argument( '--model', choices=('vgg16', 'resnet50', 'resnet101', 'resnet152')) parser.add_argument('--pretrained_model', default='imagenet') parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--crop', choices=('center', '10'), default='center') parser.add_argument('--resnet_mode', default='he') args = parser.parse_args() dataset = DirectoryParsingLabelDataset(args.val) label_names = directory_parsing_label_names(args.val) n_class = len(label_names) iterator = iterators.MultiprocessIterator( dataset, args.batchsize, repeat=False, shuffle=False, n_processes=6, shared_mem=300000000) if args.model == 'vgg16': extractor = VGG16(n_class, args.pretrained_model) elif args.model == 'resnet50': extractor = ResNet50( n_class, args.pretrained_model, mode=args.resnet_mode) elif args.model == 'resnet101': extractor = ResNet101( n_class, args.pretrained_model, mode=args.resnet_mode) elif args.model == 'resnet152': extractor = ResNet152( n_class, args.pretrained_model, mode=args.resnet_mode) model = FeaturePredictor( extractor, crop_size=224, scale_size=256, crop=args.crop) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() print('Model has been prepared. Evaluation starts.') in_values, out_values, rest_values = apply_to_iterator( model.predict, iterator, hook=ProgressHook(len(dataset))) del in_values pred_probs, = out_values gt_labels, = rest_values accuracy = F.accuracy( np.array(list(pred_probs)), np.array(list(gt_labels))).data print() print('Top 1 Error {}'.format(1. - accuracy))
def get_val_data_iterator(data_dir, batch_size, num_workers, num_classes): val_dir_path = os.path.join(data_dir, 'val') val_dataset = DirectoryParsingLabelDataset(val_dir_path) val_dataset_len = len(val_dataset) assert (len(directory_parsing_label_names(val_dir_path)) == num_classes) val_iterator = iterators.MultiprocessIterator(dataset=val_dataset, batch_size=batch_size, repeat=False, shuffle=False, n_processes=num_workers, shared_mem=300000000) return val_iterator, val_dataset_len
def __init__(self, root=os.path.join("~", ".chainer", "datasets", "imagenet"), mode="train", scale_size=256, crop_size=224, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): split = "train" if mode == "train" else "val" root = os.path.join(root, split) self.base = DirectoryParsingLabelDataset(root) self.scale_size = scale_size if isinstance(crop_size, int): crop_size = (crop_size, crop_size) self.crop_size = crop_size self.mean = np.array(mean, np.float32)[:, np.newaxis, np.newaxis] self.std = np.array(std, np.float32)[:, np.newaxis, np.newaxis]
def main(): args = parser() save_dir = Path(args.save_dir) save_dir.mkdir(exist_ok=True, parents=True) root = args.dataset dataset = DirectoryParsingLabelDataset(root) mean_path = root + '/mean.npy' if os.path.exists(mean_path): mean = np.load(mean_path) else: mean = compute_mean(datasets, root) np.save(mean_path, mean) use_mean = args.use_mean print('use mean flag is ', use_mean) if not use_mean: print('not using mean') X = np.array([image_paths for image_paths in dataset.img_paths]) y = np.array([label for label in dataset.labels]) test_data = LabeledImageDataset([(x, y) for x, y in zip(X, y)]) test = chainer.datasets.TransformDataset( test_data, partial(_transform2, mean=mean, train=False, mean_flag=args.use_mean)) #test = chainer.datasets.TransformDataset(test_data, _validation_transform) #test_model = L.Classifier(VGG16()).to_gpu() class_num = len(set(dataset.labels)) model = L.Classifier(archs[args.arch](output=class_num)).to_gpu() serializers.load_npz(args.load_npz, model) dnames = glob.glob('{}/*'.format(root)) labels_list = [] for d in dnames: p_dir = Path(d) labels_list.append(p_dir.name) if 'mean.npy' in labels_list: labels_list.remove('mean.npy') confusion_matrix_cocoa(test, args.gpu, class_num, model, save_dir, 1, labels_list)
def setup(dataset, model, pretrained_model, batchsize, val, crop, resnet_arch): dataset_name = dataset if dataset_name == 'imagenet': dataset = DirectoryParsingLabelDataset(val) label_names = directory_parsing_label_names(val) def eval_(out_values, rest_values): pred_probs, = out_values gt_labels, = rest_values accuracy = F.accuracy(np.array(list(pred_probs)), np.array(list(gt_labels))).data print() print('Top 1 Error {}'.format(1. - accuracy)) cls, pretrained_models, default_batchsize = models[model][:3] if pretrained_model is None: pretrained_model = pretrained_models.get(dataset_name, dataset_name) if crop is None: crop = models[model][3] kwargs = { 'n_class': len(label_names), 'pretrained_model': pretrained_model, } if model in ['resnet50', 'resnet101', 'resnet152']: if resnet_arch is None: resnet_arch = models[model][4] kwargs.update({'arch': resnet_arch}) extractor = cls(**kwargs) model = FeaturePredictor(extractor, crop_size=224, scale_size=256, crop=crop) if batchsize is None: batchsize = default_batchsize return dataset, eval_, model, batchsize
def main(): model_cfgs = { 'resnet50': { 'class': ResNet50, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } }, 'resnet101': { 'class': ResNet101, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } }, 'resnet152': { 'class': ResNet152, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } } } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to root of the train dataset') parser.add_argument('val', help='Path to root of the validation dataset') parser.add_argument('--model', '-m', choices=model_cfgs.keys(), default='resnet50', help='Convnet models') parser.add_argument('--communicator', type=str, default='pure_nccl', help='Type of communicator') parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument('--batchsize', type=int, default=32, help='Batch size for each worker') parser.add_argument('--lr', type=float) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--weight_decay', type=float, default=0.0001) parser.add_argument('--out', type=str, default='result') parser.add_argument('--epoch', type=int, default=90) args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if args.lr is not None: lr = args.lr else: lr = 0.1 * (args.batchsize * comm.size) / 256 if comm.rank == 0: print('lr={}: lr is selected based on the linear ' 'scaling rule'.format(lr)) label_names = directory_parsing_label_names(args.train) model_cfg = model_cfgs[args.model] extractor = model_cfg['class'](n_class=len(label_names), **model_cfg['kwargs']) extractor.pick = model_cfg['score_layer_name'] model = Classifier(extractor) # Following https://arxiv.org/pdf/1706.02677.pdf, # the gamma of the last BN of each resblock is initialized by zeros. for l in model.links(): if isinstance(l, Bottleneck): l.conv3.bn.gamma.data[:] = 0 train_data = DirectoryParsingLabelDataset(args.train) val_data = DirectoryParsingLabelDataset(args.val) train_data = TransformDataset(train_data, ('img', 'label'), TrainTransform(extractor.mean)) val_data = TransformDataset(val_data, ('img', 'label'), ValTransform(extractor.mean)) print('finished loading dataset') if comm.rank == 0: train_indices = np.arange(len(train_data)) val_indices = np.arange(len(val_data)) else: train_indices = None val_indices = None train_indices = chainermn.scatter_dataset(train_indices, comm, shuffle=True) val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True) train_data = train_data.slice[train_indices] val_data = val_data.slice[val_indices] train_iter = chainer.iterators.MultiprocessIterator( train_data, args.batchsize, n_processes=args.loaderjob) val_iter = iterators.MultiprocessIterator(val_data, args.batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) optimizer.setup(model) for param in model.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(WeightDecay(args.weight_decay)) if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() updater = chainer.training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) @make_shift('lr') def warmup_and_exponential_shift(trainer): epoch = trainer.updater.epoch_detail warmup_epoch = 5 if epoch < warmup_epoch: if lr > 0.1: warmup_rate = 0.1 / lr rate = warmup_rate \ + (1 - warmup_rate) * epoch / warmup_epoch else: rate = 1 elif epoch < 30: rate = 1 elif epoch < 60: rate = 0.1 elif epoch < 80: rate = 0.01 else: rate = 0.001 return rate * lr trainer.extend(warmup_and_exponential_shift) evaluator = chainermn.create_multi_node_evaluator( extensions.Evaluator(val_iter, model, device=device), comm) trainer.extend(evaluator, trigger=(1, 'epoch')) log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' if comm.rank == 0: trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.snapshot_object( extractor, 'snapshot_model_{.updater.epoch}.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(): model_cfgs = { 'detnas_small_coco': { 'class': DetNASSmallCOCO, 'score_layer_name': 'fc', 'kwargs': { #'n_class': 1000 } }, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to root of the train dataset') parser.add_argument('val', help='Path to root of the validation dataset') parser.add_argument('--trial', action='store_true') parser.add_argument('--gpu', type=int, default=0) parser.add_argument( '--model', '-m', choices=model_cfgs.keys(), default='detnas_small_coco', help='Convnet models') parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument( '--batchsize', type=int, help='Batch size for each worker') parser.add_argument('--lr', type=float) parser.add_argument('--momentum', type=float) parser.add_argument('--weight_decay', type=float) parser.add_argument('--out', type=str, default='result') parser.add_argument('--epoch', type=int) args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() label_names = directory_parsing_label_names(args.train) model_cfg = model_cfgs[args.model] extractor = model_cfg['class']( n_class=len(label_names), **model_cfg['kwargs']) extractor.pick = model_cfg['score_layer_name'] model = Classifier(extractor) train_data = DirectoryParsingLabelDataset(args.train) val_data = DirectoryParsingLabelDataset(args.val) train_data = TransformDataset(train_data, TrainTransform(extractor.mean)) val_data = TransformDataset(val_data, ValTransform(extractor.mean)) print('finished loading dataset') train_indices = np.arange(len(train_data)//(100 if args.trial else 1)) val_indices = np.arange(len(val_data)) """ train_data = train_data.slice[train_indices] val_data = val_data.slice[val_indices] """ train_iter = chainer.iterators.MultiprocessIterator( train_data, args.batchsize, n_processes=args.loaderjob) val_iter = iterators.MultiprocessIterator( val_data, args.batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) optimizer = CorrectedMomentumSGD(lr=args.lr, momentum=args.momentum) optimizer.setup(model) for param in model.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(WeightDecay(args.weight_decay)) if args.gpu != -1: model.to_gpu(args.gpu) updater = chainer.training.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(LinearShift('lr', (args.lr, 0.0), (0, len(train_indices) / args.batchsize))) evaluator = extensions.Evaluator(val_iter, model) trainer.extend(evaluator, trigger=(1, 'epoch')) log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' trainer.extend( chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.snapshot_object(extractor, 'snapshot_model_{.updater.epoch}.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend( extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(): model_cfgs = { 'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6', 'kwargs': {'arch': 'fb'}}, 'resnet101': {'class': ResNet101, 'score_layer_name': 'fc6', 'kwargs': {'arch': 'fb'}}, 'resnet152': {'class': ResNet152, 'score_layer_name': 'fc6', 'kwargs': {'arch': 'fb'}} } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to root of the train dataset') parser.add_argument('val', help='Path to root of the validation dataset') parser.add_argument('--export', type=str, default=None, help='Export the model to ONNX') parser.add_argument('--compile', type=str, default=None, help='Compile the model') parser.add_argument('--computation_order', type=str, default=None, help='Computation order in backpropagation') parser.add_argument('--model', '-m', choices=model_cfgs.keys(), default='resnet50', help='Convnet models') parser.add_argument('--communicator', type=str, default='pure_nccl', help='Type of communicator') parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument('--batchsize', type=int, default=32, help='Batch size for each worker') parser.add_argument('--lr', type=float) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--weight-decay', type=float, default=0.0001) parser.add_argument('--out', type=str, default='result') parser.add_argument('--epoch', type=int, default=90) parser.add_argument('--iterations', '-I', type=int, default=None, help='Number of iterations to train') parser.add_argument('--no_use_fixed_batch_dataset', dest='use_fixed_batch_dataset', action='store_false', help='Disable the use of FixedBatchDataset') parser.add_argument('--compiler-log', action='store_true', help='Enables compile-time logging') parser.add_argument('--trace', action='store_true', help='Enables runtime tracing') parser.add_argument('--verbose', action='store_true', help='Enables runtime verbose log') parser.add_argument('--skip_runtime_type_check', action='store_true', help='Skip runtime type check') parser.add_argument('--dump_memory_usage', type=int, default=0, help='Dump memory usage (0-2)') parser.add_argument('--quiet_period', type=int, default=0, help='Quiet period after runtime report') parser.add_argument('--overwrite_batchsize', action='store_true', help='Overwrite batch size') args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if args.lr is not None: lr = args.lr else: lr = 0.1 * (args.batchsize * comm.size) / 256 if comm.rank == 0: print('lr={}: lr is selected based on the linear ' 'scaling rule'.format(lr)) label_names = directory_parsing_label_names(args.train) model_cfg = model_cfgs[args.model] extractor = model_cfg['class']( n_class=len(label_names), **model_cfg['kwargs']) extractor.pick = model_cfg['score_layer_name'] # Following https://arxiv.org/pdf/1706.02677.pdf, # the gamma of the last BN of each resblock is initialized by zeros. for l in extractor.links(): if isinstance(l, Bottleneck): l.conv3.bn.gamma.data[:] = 0 if args.export is not None: chainer_compiler.use_unified_memory_allocator() extractor.to_device(device) x = extractor.xp.zeros((args.batchsize, 3, 224, 224)).astype('f') chainer_compiler.export(extractor, [x], args.export) return if args.compile is not None: print('run compiled model') chainer_compiler.use_chainerx_shared_allocator() extractor.to_device(device) # init params with chainer.using_config('enable_backprop', False),\ chainer.using_config('train', False): x = extractor.xp.zeros((1, 3, 224, 224)).astype('f') extractor(x) compiler_kwargs = {} if args.compiler_log: compiler_kwargs['compiler_log'] = True runtime_kwargs = {} if args.trace: runtime_kwargs['trace'] = True if args.verbose: runtime_kwargs['verbose'] = True if args.skip_runtime_type_check: runtime_kwargs['check_types'] = False if args.dump_memory_usage >= 1: runtime_kwargs['dump_memory_usage'] = args.dump_memory_usage free, total = cupy.cuda.runtime.memGetInfo() used = total - free runtime_kwargs['base_memory_usage'] = used onnx_filename = args.compile if args.overwrite_batchsize: new_onnx_filename = ('/tmp/overwrite_batchsize_' + os.path.basename(onnx_filename)) new_input_types = [ input_rewriter.Type(shape=(args.batchsize, 3, 224, 224)) ] input_rewriter.rewrite_onnx_file(onnx_filename, new_onnx_filename, new_input_types) onnx_filename = new_onnx_filename extractor_cc = chainer_compiler.compile_onnx( extractor, onnx_filename, 'onnx_chainer', computation_order=args.computation_order, compiler_kwargs=compiler_kwargs, runtime_kwargs=runtime_kwargs, quiet_period=args.quiet_period) model = Classifier(extractor_cc) else: print('run vanilla chainer model') model = Classifier(extractor) train_data = DirectoryParsingLabelDataset(args.train) val_data = DirectoryParsingLabelDataset(args.val) train_data = TransformDataset( train_data, ('img', 'label'), TrainTransform(extractor.mean)) val_data = TransformDataset( val_data, ('img', 'label'), ValTransform(extractor.mean)) print('finished loading dataset') if comm.rank == 0: train_indices = np.arange(len(train_data)) val_indices = np.arange(len(val_data)) else: train_indices = None val_indices = None train_indices = chainermn.scatter_dataset( train_indices, comm, shuffle=True) val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True) train_data = train_data.slice[train_indices] val_data = val_data.slice[val_indices] if args.use_fixed_batch_dataset: train_data = FixedBatchDataset(train_data, args.batchsize) val_data = FixedBatchDataset(val_data, args.batchsize) train_iter = chainer.iterators.MultiprocessIterator( train_data, args.batchsize, n_processes=args.loaderjob) val_iter = iterators.MultiprocessIterator( val_data, args.batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) optimizer.setup(model) for param in model.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(WeightDecay(args.weight_decay)) if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() updater = chainer.training.StandardUpdater( train_iter, optimizer, device=device) if args.iterations: stop_trigger = (args.iterations, 'iteration') else: stop_trigger = (args.epoch, 'epoch') trainer = training.Trainer( updater, stop_trigger, out=args.out) @make_shift('lr') def warmup_and_exponential_shift(trainer): epoch = trainer.updater.epoch_detail warmup_epoch = 5 if epoch < warmup_epoch: if lr > 0.1: warmup_rate = 0.1 / lr rate = warmup_rate \ + (1 - warmup_rate) * epoch / warmup_epoch else: rate = 1 elif epoch < 30: rate = 1 elif epoch < 60: rate = 0.1 elif epoch < 80: rate = 0.01 else: rate = 0.001 return rate * lr trainer.extend(warmup_and_exponential_shift) evaluator = chainermn.create_multi_node_evaluator( extensions.Evaluator(val_iter, model, device=device), comm) trainer.extend(evaluator, trigger=(1, 'epoch')) log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' if comm.rank == 0: trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.snapshot_object( extractor, 'snapshot_model_{.updater.epoch}.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport( ['iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'] ), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(): archs = { 'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6', 'kwargs': {'arch': 'fb'}}, 'resnet101': {'class': ResNet101, 'score_layer_name': 'fc6', 'kwargs': {'arch': 'fb'}}, 'resnet152': {'class': ResNet152, 'score_layer_name': 'fc6', 'kwargs': {'arch': 'fb'}} } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to root of the train dataset') parser.add_argument('val', help='Path to root of the validation dataset') parser.add_argument('--arch', '-a', choices=archs.keys(), default='resnet50', help='Convnet architecture') parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument('--batchsize', type=int, default=32, help='Batch size for each worker') parser.add_argument('--lr', type=float) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--weight_decay', type=float, default=0.0001) parser.add_argument('--out', type=str, default='result') parser.add_argument('--epoch', type=int, default=90) parser.add_argument('--min', type=int, required=True, help='Minimum number of processes') parser.add_argument('--start', type=int, required=True, help='Number of processes to start') parser.add_argument('--bind', '-p', type=str, required=True, help='address to bind gRPC server') parser.add_argument('--etcd', '-c', type=str, default='etcd://*****:*****@make_shift('lr') def warmup_and_exponential_shift(trainer): epoch = trainer.updater.epoch_detail warmup_epoch = 5 if epoch < warmup_epoch: if lr > 0.1: warmup_rate = 0.1 / lr rate = warmup_rate \ + (1 - warmup_rate) * epoch / warmup_epoch else: rate = 1 elif epoch < 30: rate = 1 elif epoch < 60: rate = 0.1 elif epoch < 80: rate = 0.01 else: rate = 0.001 return rate * lr trainer.extend(warmup_and_exponential_shift) evaluator = chainermn.create_multi_node_evaluator( extensions.Evaluator(val_iter, model, device=device), comm) trainer.extend(evaluator, trigger=(1, 'epoch')) trainer.extend(comm.get_uninitializer(), trigger=(1, 'iteration')) log_interval = 0.1, 'epoch' print_interval = 0.5, 'epoch' plot_interval = 1, 'epoch' if comm.intra_rank == 0: # TODO: lr is not properly controlled for accuracy trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(echainer.extension.Lineage(comm, trigger=log_interval)) trainer.extend(extensions.PrintReport( ['iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'], log_report='Lineage'), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'validation/main/loss'], file_name='loss.png', trigger=plot_interval ), trigger=plot_interval ) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], file_name='accuracy.png', trigger=plot_interval ), trigger=plot_interval ) # Optimizer includes model parameters and other params in optimizer comm.register_state('optimizer', optimizer) comm.register_state('iterator', train_iter) if retry or not comm.initial: (iteration, epoch) = comm.fetch_state('optimizer', optimizer) # train_iter.epoch = epoch comm.fetch_state('iterator', train_iter) updater.iteration = iteration optimizers = trainer.updater.get_all_optimizers() for name in optimizers.keys(): optimizers[name].reset_prev_params() try: print('start trainer.run(), ', trainer.updater.iteration, trainer.updater.epoch) trainer.run() done = trainer._done except CommException as ce: print("Comm exception >>>>>>>>>>>", ce, updater.iteration, updater.epoch) comm.save_all_states(updater.iteration, updater.epoch) # Here comm will be ready to accept fetch state calls and once all # nodes got catched up it'll return and continue to run: TODO comm.sync_cluster(trainer.updater.get_all_optimizers()) retry = True continue except ClusterUpdatedException as ce: print("Cluster updated: >>>>>>>>>>>", ce) comm.save_all_states(updater.iteration, updater.epoch) comm.sync_cluster(trainer.updater.get_all_optimizers()) retry = True continue except Exception as e: print("Unexpected >>>>>>>>>>>", e) break comm.leave()
def main(): args = parser() # 時間読み込み now = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") # 保存ディレクトリ先 save_dir = Path('result') / now log_dir = save_dir / 'log' model_dir = save_dir / 'model' snap_dir = save_dir / 'snap' matrix_dir = save_dir / 'matrix' # 保存ディレクトリ先作成 save_dir.mkdir(exist_ok=True, parents=True) log_dir.mkdir(exist_ok=True, parents=True) model_dir.mkdir(exist_ok=True, parents=True) snap_dir.mkdir(exist_ok=True, parents=True) matrix_dir.mkdir(exist_ok=True, parents=True) # Dataset読み込み root = args.dataset dir_list = os.listdir(root) dir_list.sort() if 'mean.npy' in dir_list: dir_list.remove('mean.npy') # datasetに画像ファイルとラベルを読み込む print('dataset loading ...') datasets = DirectoryParsingLabelDataset(root) print('finish!') # クラス数 class_num = len(set(datasets.labels)) print('class number : {}'.format(class_num)) # fold数 k_fold = args.kfold print('k_fold : {}'.format(k_fold)) X = np.array([image_paths for image_paths in datasets.img_paths]) y = np.array([label for label in datasets.labels]) kfold = StratifiedKFold(n_splits=k_fold, shuffle=True, random_state=402).split(X, y) for k, (train_idx, val_idx) in enumerate(kfold): print("============= {} fold training =============".format(k + 1)) X_train, y_train = X[train_idx], y[train_idx] X_val, y_val = X[val_idx], y[val_idx] # 画像とラベルをセットにしたデータセットを作る train = LabeledImageDataset([(x, y) for x, y in zip(X_train, y_train)]) validation = LabeledImageDataset([(x, y) for x, y in zip(X_val, y_val)]) train, validation, mean = get_dataset(train, validation, root, datasets, use_mean=False) # model setup model = StabilityClassifer(archs[args.arch](output=class_num)) #model = ABNClassifier(archs[args.arch](output=class_num)) lr = args.lr optimizer = chainer.optimizers.MomentumSGD(lr) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0001)) # using GPU if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # setup iterators train_iter = chainer.iterators.MultithreadIterator(train, args.batchsize, n_threads=8) validation_iter = chainer.iterators.MultithreadIterator(validation, args.batchsize, repeat=False, shuffle=False, n_threads=8) # setup updater and trainer updater = training.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer( updater, (args.epoch, 'epoch'), out=save_dir) # set extensions log_trigger = (1, 'epoch') target = 'lr' trainer.extend(CosineShift(target, args.epoch, 1), trigger=(1, "epoch")) trainer.extend(extensions.Evaluator(validation_iter, model, device=args.gpu), trigger=log_trigger) snap_name = '{}-{}_fold_model.npz'.format(k_fold, k+1) trainer.extend(extensions.snapshot_object(model, str(snap_name)), trigger=chainer.training.triggers.MaxValueTrigger( key='validation/main/accuracy', trigger=(1, 'epoch'))) log_name = '{}-{}_fold_log.json'.format(k_fold, k+1) trainer.extend(extensions.LogReport( log_name=str(log_name), trigger=log_trigger)) trainer.extend(extensions.observe_lr(), trigger=log_trigger) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss','main/lossL2', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time', 'lr' ]), trigger=(1, 'epoch')) trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch',file_name='loss{}.png'.format(k+1))) trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy{}.png'.format(k+1))) trainer.extend(extensions.ProgressBar(update_interval=10)) #if args.resume: #chainer.serializers.load_npz(args.resume, trainer) trainer.run() snap_file = save_dir / snap_name shutil.move(str(snap_file), str(snap_dir)) log_file = save_dir / log_name shutil.move(str(log_file), str(log_dir)) # model save save_model = model_dir / "{}_{}-{}_fold.npz".format(now, k_fold, k + 1) chainer.serializers.save_npz(str(save_model), model) print("============= {} fold Evaluation =============".format(k + 1)) # 画像フォルダ dnames = glob.glob('{}/*'.format(root)) labels_list = [] for d in dnames: p_dir = Path(d) labels_list.append(p_dir.name) if 'mean.npy' in labels_list: labels_list.remove('mean.npy') confusion_matrix_cocoa(validation, args.gpu, 8, model, matrix_dir, k, labels_list)
def load_dataset(path): dataset = DirectoryParsingLabelDataset(path) dataset = PreprocessDataset(dataset) train_size = int(len(dataset) * 0.9) train, test = split_dataset_random(dataset, train_size, seed=0) return train, test