def get_dataset(dataset): if dataset == "mnist": # label 0 ~ 10 n_class = 10 # mnistのロード train, test = get_mnist(ndim=3) # 本来ならiteratorで回すがわかりやすようにデータとラベルで分割 train_dataset, test_dataset = split_dataset(train, test) elif dataset == "cifar10": # label n_class = 10 # cifar10のロード train, test = get_cifar10() # 本来ならiteratorで回すがわかりやすようにデータとラベルで分割 train_dataset, test_dataset = split_dataset(train, test) elif dataset == "cifar100": # label n_class = 100 # cifar100 train, test = get_cifar100() # 本来ならiteratorで回すがわかりやすようにデータとラベルで分割 train_dataset, test_dataset = split_dataset(train, test) else: raise RuntimeError('Invalid dataset choice.') return n_class, train_dataset, test_dataset
def dataset(name): from chainer.datasets import get_mnist, get_cifar10, get_cifar100 from datasets import get_imagenet def_attr = lambda image_colors, class_labels: \ (image_colors, class_labels) sets = { "mnist": { "attr": def_attr(1, 10), "data": lambda: get_mnist(ndim=3) }, "cifar10": { "attr": def_attr(3, 10), "data": lambda: get_cifar10() }, "cifar100": { "attr": def_attr(3, 100), "data": lambda: get_cifar100() }, "imagenet": { "attr": def_attr(3, 1000), "data": lambda: get_imagenet() } } print('using {} dataset.'.format(name)) if name in sets: return sets[name] else: raise RuntimeError('Invalid dataset choice.')
def get_dataset(dataset): "Get dataset." if dataset == 'cifar10': return D.get_cifar10(scale=255.) if dataset == 'cifar100': return D.get_cifar100(scale=255.) if dataset == 'SVHN': return D.get_svhn(scale=255.) raise RuntimeError('Invalid dataset.')
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--minibatchsize', '-mb', type=int, default=16, help='Number of images in each mini-mini-batch') parser.add_argument('--valid', '-v', type=int, default=10, help='Number of validation in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 cifar_train, cifar_test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 cifar_train, cifar_test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') model = models.VGG.VGG16(class_labels)
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.test: train = train[:200] test = test[:200] train_count = len(train) test_count = len(test) model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() # Reduce learning rate by 0.5 every 25 epochs. if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch: optimizer.lr *= 0.5 print('Reducing learning rate to: ', optimizer.lr) x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) optimizer.update(model, x, t) sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) if train_iter.is_new_epoch: print('epoch: ', train_iter.epoch) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation sum_accuracy = 0 sum_loss = 0 model.predictor.train = False for batch in test_iter: x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) loss = model(x, t) sum_loss += float(loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) test_iter.reset() model.predictor.train = True print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0 # Save the model and the optimizer print('save the model') serializers.save_npz('mlp.model', model) print('save the optimizer') serializers.save_npz('mlp.state', optimizer)
def get_chainer_cifar100(): raw_train, raw_test = datasets.get_cifar100(withlabel=True, ndim=3, scale=1.) return process_data(raw_train, raw_test)
def get_dataset(dataset_type, matrixForData, **kwargs): if dataset_type == 'synthetic': train = binary_tree.get_data(matrixForData) valid = train.copy() test_data = train.copy() if kwargs['dataset_randomness'] != -1: train = binary_tree.ProbabilisticBinaryTreeDataset( train, eps=kwargs['dataset_randomness']) valid = binary_tree.ProbabilisticBinaryTreeDataset( valid, eps=kwargs['dataset_randomness']) test = binary_tree.ProbabilisticBinaryTreeDataset( test_data, eps=kwargs['dataset_randomness']) elif dataset_type == 'mnist': # Load the MNIST dataset ndim = kwargs.get('ndim') if 'ndim' in kwargs else 1 train, test = mnist_activity.get_mnist(withlabel=False, ndim=ndim, data=matrixForData, dtype=matrixForData.dtype) # train, test = datasets.get_mnist(withlabel=False, ndim=ndim) # Binarize dataset #train[train >= 0.5] = 1.0 #train[train < 0.5] = 0.0 #test[test >= 0.5] = 1.0 #test[test < 0.5] = 0.0 size_data = len(train[:, 1]) upper_part = math.floor(0.8 * size_data) train, valid = datasets.split_dataset(train, upper_part) elif dataset_type == 'cifar100': # Load the Cifar-100 dataset train, test = datasets.get_cifar100(withlabel=False) train = 2 * (train - 0.5) test = 2 * (test - 0.5) train, valid = datasets.split_dataset(train, 49000) elif dataset_type == 'breakout': train, test = breakout.load_dataset(withlabel=False) # scaling data from [0, 1] to [-1, 1] train = 2 * (train - 0.5) test = 2 * (test - 0.5) train, valid = datasets.split_dataset(train, 80000) elif dataset_type == 'wordnet': num_negatives = kwargs['num_negatives'] symmetrize = kwargs['symmetrize'] assert num_negatives == 1 train = wordnet.load_dataset(num_negatives, symmetrize) valid = None test = None elif dataset_type == 'mammal': num_negatives = kwargs['num_negatives'] symmetrize = kwargs['symmetrize'] assert num_negatives == 1 train = wordnet.load_dataset(num_negatives, symmetrize, mammal=True) valid = None test = None else: raise ValueError return train, valid, test
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar100', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--model', '-m', default='VGG16', help='The model to use: VGG16 or PreResNet110' ' or WideResNet28x10') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--lr_init', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=200, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--wd', type=float, default=1e-4, help='weight decay') parser.add_argument('--se', action='store_true', help='snapshot ensemble usage flag') parser.add_argument('--se_cycle', type=int, default=5, help='split the training process into N cycles, ' 'each of which starts with a large LR') args = parser.parse_args() if args.dataset.lower() == 'cifar10': print('Using CIFAR10 dataset') class_labels = 10 train, test = get_cifar10() elif args.dataset.lower() == 'cifar100': print('Using CIFAR100 dataset') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') print('Using %s model' % args.model) if args.model == 'VGG16': model_cls = VGG16 elif args.model == 'PreResNet110': model_cls = PreResNet110 elif args.model == 'WideResNet28x10': model_cls = WideResNet28x10 else: raise RuntimeError('Invalid model choice.') model = L.Classifier(model_cls(class_labels)) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(args.lr_init, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(args.wd)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) stop_trigger = (args.epoch, 'epoch') # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, stop_trigger, out=args.out) # Learning rate adjustment (this function is called every epoch) def baseline_lr_schedule(trainer): epoch = trainer.updater.epoch t = epoch / args.epoch factor = 1.0 if t >= 0.5: factor = 0.1 elif t >= 0.75: factor = 0.01 trainer.updater.get_optimizer('main').lr = factor * args.lr_init total_iter = len(train) * args.epoch // args.batchsize cycle_iter = math.floor(total_iter / args.se_cycle) # Learning rate adjustment (this function is called every epoch) def cycle_lr_schedule(trainer): iter = trainer.updater.iteration lr = args.lr_init * 0.5 lr *= math.cos(math.pi * ((iter - 1) % cycle_iter) / cycle_iter) + 1 trainer.updater.get_optimizer('main').lr = lr # Set up extentions trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) if args.se: trainer.extend(extensions.snapshot(), trigger=(cycle_iter, 'iteration')) trainer.extend(cycle_lr_schedule, trigger=triggers.IntervalTrigger(1, 'iteration')) else: trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(baseline_lr_schedule, trigger=triggers.IntervalTrigger(1, 'epoch')) trainer.extend(extensions.observe_lr()) trainer.extend(extensions.LogReport()) cols = [ 'epoch', 'lr', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time' ] trainer.extend(extensions.PrintReport(cols)) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--early-stopping', type=str, help='Metric to watch for early stopping') parser.add_argument('--model-type', type=str, help='Type of model to fit', default='simple_linear') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') modelsdict = { 'fitnet1': models.fitnet1.FitNet1(class_labels), 'simple_linear': models.simple_linear.SimpleLinear(class_labels), } model = L.Classifier(modelsdict[args.model_type]) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) stop_trigger = (args.epoch, 'epoch') # Early stopping option if args.early_stopping: stop_trigger = triggers.EarlyStoppingTrigger( monitor=args.early_stopping, verbose=True, max_trigger=(args.epoch, 'epoch')) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def load_dataset(batchsize, dataset, augment=False, fast=False, old_test_method=False): scale = 255.0 if augment else 1.0 if dataset == 'cifar10': train, test = get_cifar10(scale=scale) class_labels = 10 elif dataset == 'cifar100': train, test = get_cifar100(scale=scale) class_labels = 100 else: raise RuntimeError('Invalid dataset choice.') if augment: #mean = np.mean(train._datasets[0], axis=(0, 2, 3)) #std = np.std(train._datasets[0], axis=(0, 2, 3)) # Pre calculated from above mean = np.array([125.30690002, 122.95014954, 113.86599731]) std = np.array([62.9932518, 62.08860397, 66.70500946]) train = normalize_dataset(train, mean, std) test = normalize_dataset(test, mean, std) # Previously pca was 25.5 or 10% of 255 # Now we normalize, so to keep PCA at 10% of the range we use the min and max of the # normalized datasets #pca_sigma = 0.2 * (np.max(train._datasets[0] - np.min(train._datasets[0]) # Pre calculated from above pca_sigma = 0.1 * ((2.126797) - (-1.9892114)) # = 0.4116 slow_augment = dict(crop_size=(32, 32), expand_ratio=1.2, pca_sigma=pca_sigma, random_angle=15.0, train=True) fast_augment = dict(crop_size=(32, 32), cutout=8, flip=True) if fast: train = pad_dataset(train, pad=4) train_transform = partial(transform_fast, **fast_augment) test_transform = lambda x: x # No augmentation else: train_transform = partial(transform, **slow_augment) test_transform = partial(transform, train=False, old_test_method=old_test_method) train = TransformDataset(train, train_transform) test = TransformDataset(test, test_transform) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) return train_iter, test_iter, class_labels
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--minibatchsize', '-mb', type=int, default=32, help='Number of images in each mini-mini-batch') parser.add_argument('--valid', '-v', type=int, default=10, help='Number of validation in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.01, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--select_mode', '-s', type=int, default=1, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--decay', '-dy', type=int, default=1, help='learnrate decay') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() save_path = './result_proposal_2/' if not os.path.exists(save_path): os.makedirs(save_path) print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.test: train = train[:200] test = test[:200] train_count = len(train) test_count = len(test) model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) sum_accuracy = 0 sum_loss = 0 iteration = 0 temp = 10.0 thresh = int(float(args.batchsize) / args.minibatchsize) train_accuracy_list = np.zeros(args.epoch, dtype=np.float32) test_accuracy_list = np.zeros(args.epoch, dtype=np.float32) while train_iter.epoch < args.epoch: batch = train_iter.next() # Reduce learning rate by 0.5 every 25 epochs. if args.decay == 1: if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch: optimizer.lr *= 0.5 print('Reducing learning rate to: ', optimizer.lr) x_array, t_array = convert.concat_examples(batch, args.gpu) if iteration == 0: x = chainer.Variable(x_array) t = chainer.Variable(t_array) optimizer.update(model, x, t) sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) else: init_weights = cache_weights(model) weight_list = [] valid_accuracy = np.zeros(args.valid, dtype=np.float32) valid_loss = np.zeros(args.valid, dtype=np.float32) for valid_iter in xrange(args.valid): restore_weights(model, init_weights) train_indices = np.random.choice(args.batchsize, args.minibatchsize, replace=False) valid_indices = np.ones(args.batchsize, dtype=bool) valid_indices[train_indices] = False x_train = x_array[train_indices] t_train = t_array[train_indices].astype(np.int32) x_valid = x_array[valid_indices] t_valid = t_array[valid_indices].astype(np.int32) x = chainer.Variable(x_train) t = chainer.Variable(t_train) optimizer.update(model, x, t) update_weights = cache_weights(model) x_vld = chainer.Variable(x_valid) t_vld = chainer.Variable(t_valid) loss = model(x_vld, t_vld) accuracy_valid = float(model.accuracy.data) weight_list.append(update_weights) valid_accuracy[valid_iter] = accuracy_valid valid_loss[valid_iter] = float(loss.data) if args.select_mode == 0: best_index = np.argmax(valid_accuracy) best_weight = weight_list[best_index] curr_loss = valid_loss[best_index] curr_accuracy = valid_accuracy[best_index] restore_weights(model, best_weight) if args.select_mode == 1: select_index = np.random.choice( args.valid, p=np.exp(temp * valid_accuracy) / np.sum(np.exp(temp * valid_accuracy))) select_weight = weight_list[select_index] curr_loss = valid_loss[select_index] curr_accuracy = valid_accuracy[select_index] restore_weights(model, select_weight) if train_iter.is_new_epoch: temp *= 0.99 print('Reducing temp to: ', temp) sum_loss += curr_loss * len(t.data) sum_accuracy += curr_accuracy * len(t.data) iteration += 1 if train_iter.is_new_epoch: print('epoch: ', train_iter.epoch) print('train mean loss: {}, accuracy: {}'.format( 4.0 * sum_loss / train_count, 4.0 * sum_accuracy / train_count)) train_accuracy_list[ train_iter.epoch] = 4.0 * sum_accuracy / train_count # evaluation sum_accuracy = 0 sum_loss = 0 model.predictor.train = False for batch in test_iter: x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) loss = model(x, t) sum_loss += float(loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) test_iter.reset() model.predictor.train = True print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) test_accuracy_list[train_iter.epoch] = sum_accuracy / test_count sum_accuracy = 0 sum_loss = 0 # Save the model and the optimizer np.save( save_path + 'train_accyracy_lr_{}_decay_{}.npy'.format(args.learnrate, args.decay), train_accuracy_list) np.save( save_path + 'test_accyracy_lr_{}_decay_{}.npy'.format(args.learnrate, args.decay), test_accuracy_list)
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--device', '-d', type=str, default='0', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() device = chainer.get_device(args.device) if device.xp is chainerx: sys.stderr.write('This example does not support ChainerX devices.\n') sys.exit(1) print('Device: {}'.format(device)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') device.use() # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.test: train = train[:200] test = test[:200] train_count = len(train) test_count = len(test) model = L.Classifier(models.VGG.VGG(class_labels)) model.to_device(device) optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() # Reduce learning rate by 0.5 every 25 epochs. if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch: optimizer.lr *= 0.5 print('Reducing learning rate to: {}'.format(optimizer.lr)) x_array, t_array = convert.concat_examples(batch, device) x = chainer.Variable(x_array) t = chainer.Variable(t_array, requires_grad=False) optimizer.update(model, x, t) sum_loss += float(model.loss.array) * len(t) sum_accuracy += float(model.accuracy.array) * len(t) if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation sum_accuracy = 0 sum_loss = 0 model.predictor.train = False # It is good practice to turn off train mode during evaluation. with configuration.using_config('train', False): for batch in test_iter: x_array, t_array = convert.concat_examples(batch, device) x = chainer.Variable(x_array) t = chainer.Variable(t_array, requires_grad=False) loss = model(x, t) sum_loss += float(loss.array) * len(t) sum_accuracy += float(model.accuracy.array) * len(t) test_iter.reset() model.predictor.train = True print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0 # Save the model and the optimizer print('save the model') serializers.save_npz('mlp.model', model) print('save the optimizer') serializers.save_npz('mlp.state', optimizer)
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.add_argument('--resume', '-r', type=str, help='Directory that has `vgg.model` and `vgg.state`') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.test: train = train[:200] test = test[:200] train_count = len(train) test_count = len(test) model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) if args.resume is not None: resume = args.resume if os.path.exists(resume): serializers.load_npz(os.path.join(resume, 'vgg.model'), model) serializers.load_npz(os.path.join(resume, 'vgg.state'), optimizer) else: raise ValueError( '`args.resume` ("{}") is specified,' ' but it does not exist.'.format(resume) ) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) sum_acc = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() # Reduce learning rate by 0.5 every 25 epochs. if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch: optimizer.lr *= 0.5 print('Reducing learning rate to: {}'.format(optimizer.lr)) x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) optimizer.update(model, x, t) sum_loss += float(model.loss.array) * len(t) sum_acc += float(model.accuracy.array) * len(t) if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_acc / train_count)) sum_acc = 0 sum_loss = 0 # Enable evaluation mode. with configuration.using_config('train', False): # This is optional but can reduce computational overhead. with chainer.using_config('enable_backprop', False): for batch in test_iter: x, t = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x) t = chainer.Variable(t) loss = model(x, t) sum_loss += float(loss.array) * len(t) sum_acc += float(model.accuracy.array) * len(t) test_iter.reset() print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_acc / test_count)) sum_acc = 0 sum_loss = 0 # Save the model and the optimizer out = args.out if not os.path.exists(out): os.makedirs(out) print('save the model') serializers.save_npz(os.path.join(out, 'vgg.model'), model) print('save the optimizer') serializers.save_npz(os.path.join(out, 'vgg.state'), optimizer)
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--pretrain', default=0, help='Pretrain (w/o VD) or not (w/ VD).' + ' default is not (0).') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--resume-opt', '-ro', default='', help='Resume optimizer the training from snapshot') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') print('# train:', len(train)) print('# test :', len(test)) if args.pretrain: model = nets.VGG16(class_labels) def calc_loss(x, t): model.y = model(x) model.loss = F.softmax_cross_entropy(model.y, t) reporter.report({'loss': model.loss}, model) model.accuracy = F.accuracy(model.y, t) reporter.report({'accuracy': model.accuracy}, model) return model.loss model.calc_loss = calc_loss model.use_raw_dropout = True elif args.resume: model = nets.VGG16VD(class_labels, warm_up=1.) model(train[0][0][None, ]) # for setting in_channels automatically model.to_variational_dropout() chainer.serializers.load_npz(args.resume, model) else: model = nets.VGG16VD(class_labels, warm_up=0.0001) model(train[0][0][None, ]) # for setting in_channels automatically model.to_variational_dropout() if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU if args.pretrain: # Original Torch code (http://torch.ch/blog/2015/07/30/cifar.html) # uses lr=1. However, it doesn't work well as people say in the post. # This follows a version of Chainer example using lr=0.1. optimizer = chainer.optimizers.MomentumSGD(0.1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) elif args.resume: optimizer = chainer.optimizers.Adam(1e-5) optimizer.setup(model) else: optimizer = chainer.optimizers.Adam(1e-4) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(10.)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) if args.resume: classifier = L.Classifier(model.copy()) accuracy = extensions.Evaluator(test_iter, classifier, device=args.gpu)()['main/accuracy'] print('test accuracy VD:', accuracy) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, loss_func=model.calc_loss) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend( extensions.Evaluator(test_iter, L.Classifier(model), device=args.gpu)) if args.pretrain: trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) elif not args.resume: trainer.extend( extensions.LinearShift( 'alpha', (1e-4, 0.), (0, args.epoch * len(train) // args.batchsize))) # Take a snapshot at each epoch # trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) if args.pretrain: trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) # Write a log of evaluation statistics for each epoch # trainer.extend(extensions.LogReport()) per = min(len(train) // args.batchsize // 2, 1000) trainer.extend(extensions.LogReport(trigger=(per, 'iteration'))) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'main/class', 'main/kl', 'main/mean_p', 'main/sparsity', 'main/W/Wnz', 'main/kl_coef', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # Run the training trainer.run() print('Measure inference speeds for 1 sample inference...') test_iter = chainer.iterators.SerialIterator(test, 1, repeat=False, shuffle=False) if not args.pretrain: if args.gpu >= 0: classifier = L.Classifier(model.copy()) start = time.time() accuracy = extensions.Evaluator(test_iter, classifier, device=args.gpu)()['main/accuracy'] print('dense Gpu:', time.time() - start, 's/{} imgs'.format(len(test))) model.to_cpu() classifier = L.Classifier(model.copy()) start = time.time() accuracy = extensions.Evaluator(test_iter, classifier, device=-1)()['main/accuracy'] print('dense Cpu:', time.time() - start, 's/{} imgs'.format(len(test))) model.to_cpu_sparse() model.name = None classifier = L.Classifier(copy.deepcopy(model)) start = time.time() accuracy = extensions.Evaluator(test_iter, classifier, device=-1)()['main/accuracy'] print('sparse Cpu:', time.time() - start, 's/{} imgs'.format(len(test)))
from chainer import datasets train, test = datasets.get_cifar100()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--model', '-m', type=str, default=None) parser.add_argument('--opt', type=str, default=None) parser.add_argument('--epoch', '-e', type=int, default=40) parser.add_argument('--looptimes', '-t', type=int, default=5) parser.add_argument('--lr', '-l', type=float, default=0.01) parser.add_argument('--batch', '-b', type=int, default=128) parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') args = parser.parse_args() if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') # Set up a neural network to train. model = L.Classifier( network.LocalPCN(class_labels=class_labels, LoopTimes=args.looptimes)) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = optimizers.NesterovAG(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(1e-3)) num_train_samples = 45000 train_iter = iterators.SerialIterator(train[:num_train_samples], batch_size=args.batch, shuffle=True) test_iter = iterators.SerialIterator(train[num_train_samples:], batch_size=args.batch, repeat=False, shuffle=False) if args.model != None: print("loading model from " + args.model) serializers.load_npz(args.model, model) if args.opt != None: print("loading opt from " + args.opt) serializers.load_npz(args.opt, optimizer) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out='results') trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) trainer.extend(extensions.LogReport(trigger=(10, 'iteration'))) trainer.extend(extensions.observe_lr(), trigger=(10, 'iteration')) # Schedule of a learning rate (LinearShift) trainer.extend( extensions.LinearShift('lr', (args.lr, args.lr * 0.1), (args.epoch * 0.5, args.epoch * 0.5 + 1)), trigger=(1, 'epoch')) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr', 'elapsed_time' ]), trigger=(1, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=1)) #Plot computation graph trainer.extend(extensions.dump_graph('main/loss')) # Train trainer.run() # Save results modelname = "./results/model" print("saving model to " + modelname) serializers.save_npz(modelname, model) optimizername = "./results/optimizer" print("saving optimizer to " + optimizername) serializers.save_npz(optimizername, optimizer)
#!/usr/bin/python #-*- coding:utf-8 -*- import numpy as np import chainer from chainer import datasets def ret_index(val, lis): for i, lab in enumerate(lis): if val in lab: return i, lab.index(val) print '[INFO] Fetch cifar100 dataset ...' train, test = datasets.get_cifar100(ndim=1) print '[INFO] Done fetch!' new_train = [[] for i in xrange(20)] new_test = [[] for i in xrange(20)] labels = [[4, 30, 55, 72, 95], [1, 32, 67, 73, 91], [54, 62, 70, 82, 92], [9, 10, 16, 28, 61], [0, 51, 53, 57, 83], [22, 39, 40, 86, 87], [5, 20, 25, 84, 94], [6, 7, 14, 18, 24], [3, 42, 43, 88, 97], [12, 17, 37, 68, 76], [23, 33, 49, 60, 71], [15, 19, 21, 31, 38], [34, 63, 64, 66, 75], [26, 45, 77, 79, 99], [2, 11, 35, 46, 98], [27, 29, 44, 78, 93], [36, 50, 65, 74, 80], [47, 52, 56, 59, 96], [8, 13, 48, 58, 90], [41, 69, 81, 85, 89]] print '[INFO] Remaking dataset ...' for t in train:
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar100', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--model', '-m', default='VGG16', help='The model to use: VGG16 or PreResNet110' ' or WideResNet28x10') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory from which read the snapshot files') args = parser.parse_args() if args.dataset.lower() == 'cifar10': print('Using CIFAR10 dataset') class_labels = 10 train, test = get_cifar10() elif args.dataset.lower() == 'cifar100': print('Using CIFAR100 dataset') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') print('Using %s model' % args.model) if args.model == 'VGG16': model_cls = VGG16 elif args.model == 'PreResNet110': model_cls = PreResNet110 elif args.model == 'WideResNet28x10': model_cls = WideResNet28x10 else: raise RuntimeError('Invalid model choice.') model = model_cls(class_labels) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) t = np.array([data[1] for data in test], np.int32) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() t = cuda.cupy.array(t) def predict(model, test_iter): probs = [] test_iter.reset() for batch in test_iter: in_arrays = convert.concat_examples(batch, args.gpu) with chainer.using_config('train', False), \ chainer.using_config('enable_backprop', False): y = model(in_arrays[0]) prob = chainer.functions.softmax(y) probs.append(prob.data) return concat_arrays(probs) # gather each model's softmax outputs results = [] for snapshot_path in glob.glob(args.out + '/*snapshot*'): serializers.load_npz(snapshot_path, model, path='updater/model:main/predictor/') y = predict(model, test_iter) acc = F.accuracy(y, t) results.append(y[None]) print('accuracy:', acc.data) # compute the average results = concat_arrays(results) y = results.mean(axis=0) acc = F.accuracy(y, t) print('-'*50) print('ensemble accuray:', acc.data)
def load_cifar100_as_ndarray(ndim): train, test = get_cifar100(ndim=ndim) train = concat_examples(train) test = concat_examples(test) return train, test
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.test: train = train[:200] test = test[:200] train_count = len(train) test_count = len(test) model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() # Reduce learning rate by 0.5 every 25 epochs. if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch: optimizer.lr *= 0.5 print('Reducing learning rate to: {}'.format(optimizer.lr)) x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) optimizer.update(model, x, t) sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation sum_accuracy = 0 sum_loss = 0 model.predictor.train = False for batch in test_iter: x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) loss = model(x, t) sum_loss += float(loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) test_iter.reset() model.predictor.train = True print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0 # Save the model and the optimizer print('save the model') serializers.save_npz('mlp.model', model) print('save the optimizer') serializers.save_npz('mlp.state', optimizer)
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--seed', '-s', type=int, default=123, help='seed for split dataset into train & validation') parser.add_argument('--augment', '-a', type=bool, default=True, help='whether augment dataset or not') parser.add_argument('--parallel', '-p', type=bool, default=True, help='use multiprocess iterator or not') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.augment: train = TransformDataset(train, transform) else: train, val = split_dataset_random(train, 45000, seed=args.seed) model = L.Classifier(DenseNetCifar(n_class=class_labels)) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.NesterovAG(lr=args.learnrate, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) if args.parallel: train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=2) test_iter = chainer.iterators.MultiprocessIterator( test, args.batchsize, repeat=False, shuffle=False, n_processes=2) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.snapshot_object(model.predictor, filename='densenet.model'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr', 'elapsed_time'])) trainer.extend(extensions.PlotReport( y_keys=['main/loss', 'validation/main/loss'], file_name='loss.png')) trainer.extend(extensions.PlotReport( y_keys=['main/accuracy', 'validation/main/accuracy'], file_name='accuracy.png')) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(0.1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--seed', '-s', type=int, default=0, help='seed for random values') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.01, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--aug_method', '-a', default='random_erasing', choices=['none', 'mixup', 'random_erasing', 'both'], help='data augmentation strategy') parser.add_argument('--model', '-m', default='pyramid', choices=['resnet50', 'pyramid'], help='data augmentation strategy') parser.add_argument('--weights', '-w', default='', help='initial weight') parser.add_argument('--consistent_weight', default=10) args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print(args) print('') set_random_seed(args.seed) # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 # trainのうち10000枚を検証用にとっておく. splitと呼ぶ # testの10000枚はラベルを-1に変換して、ラベルなしのデータとして扱う. unlabeledと呼ぶ # 1. testに対して、精度があがるのか? # 2. splitで、精度の向上と連動した様子が観察できるのか? train, test = get_cifar10() split = train[-10000:] train = train[:-10000] # label = -1のデータとして扱う unlabeled = [(x[0], -1) for x in test] print( f'train:{len(train)}, unlabeled:{len(unlabeled)}, test:{len(test)}' ) train = chainer.datasets.ConcatenatedDataset(train, unlabeled) elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.model == 'resnet50': predictor = ResNet(None) predictor.fc6 = L.Linear(2048, class_labels) predictor.fc6.name = 'fc6' predictor2 = ResNet(None) predictor2.fc6 = L.Linear(2048, class_labels) predictor2.fc6.name = 'fc6' elif args.model == 'pyramid': predictor = shaked_pyramid_net.PyramidNet(skip=True) if not args.weights == '': print(f'loading weights from {args.weights}') chainer.serializers.load_npz(args.weights, predictor) chainer.serializers.load_npz(args.weights, predictor2) model = mean_teacher_train_chain.MeanTeacherTrainChain( predictor, predictor2, args.consistent_weight) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU model.teacher.to_gpu() optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) # augment train data print('currently, aug_method is ignored') train = dataset.SingleCifar10((train, None)) train = chainer.datasets.transform_dataset.TransformDataset( train, transformer.LessonTransform(crop_size=(32, 32))) train_iter = chainer.iterators.SerialIterator(train, args.batchsize, shuffle=True) split_iter = chainer.iterators.SerialIterator(split, args.batchsize, repeat=False, shuffle=False) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # teacherをupdateするためのextension def update_teacher(trainer): model.on_update_finished(trainer) trainer.extend(update_teacher) # Evaluate the model with the test dataset for each epoch eval_trigger = (1, 'epoch') classifier = chainer.links.Classifier(model.teacher) split_evaluator = extensions.Evaluator(split_iter, classifier, device=args.gpu) split_evaluator.name = 'observable_validation' trainer.extend(split_evaluator, trigger=eval_trigger) truth_evaluator = extensions.Evaluator(test_iter, classifier, device=args.gpu) truth_evaluator.name = 'truth_validation' trainer.extend(truth_evaluator, trigger=eval_trigger) # Reduce the learning rate by half every 25 epochs. lr_drop_epoch = [int(args.epoch * 0.5), int(args.epoch * 0.75)] lr_drop_ratio = 0.1 print(f'lr schedule: {lr_drop_ratio}, timing: {lr_drop_epoch}') def lr_drop(trainer): trainer.updater.get_optimizer('main').lr *= lr_drop_ratio trainer.extend(lr_drop, trigger=chainer.training.triggers.ManualScheduleTrigger( lr_drop_epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch #trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'observable_best_accuracy.npz'), trigger=chainer.training.triggers.MaxValueTrigger( 'observable_validation/main/accuracy')) trainer.extend(extensions.snapshot_object(model, 'truth_best_accuracy.npz'), trigger=chainer.training.triggers.MaxValueTrigger( 'truth_validation/main/accuracy')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'lr', 'main/class_loss', 'main/consistency_loss', 'main/loss', 'main/teacher_accuracy', 'main/student_accuracy', 'observable_validation/main/loss', 'observable_validation/main/accuracy', 'truth_validation/main/accuracy', 'truth_validation/main/loss', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # interact with chainerui trainer.extend(CommandsExtension(), trigger=(100, 'iteration')) # save args save_args(args, args.out) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # trainer.extend(extensions.dump_graph('main/loss')) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar100', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--model', '-m', default='VGG16', help='The model to use: VGG16 or PreResNet110' ' or WideResNet28x10') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--lr_init', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=200, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--wd', type=float, default=1e-4, help='weight decay') parser.add_argument('--swa', action='store_true', help='swa usage flag') parser.add_argument('--swa_start', type=float, default=161, help='SWA start epoch number') parser.add_argument('--swa_lr', type=float, default=0.05, help='SWA LR') parser.add_argument('--swa_c_epochs', type=int, default=1, help='SWA model collection frequency length in epochs') args = parser.parse_args() if args.dataset.lower() == 'cifar10': print('Using CIFAR10 dataset') class_labels = 10 train, test = get_cifar10() elif args.dataset.lower() == 'cifar100': print('Using CIFAR100 dataset') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') print('Using %s model' % args.model) if args.model == 'VGG16': model_cls = VGG16 elif args.model == 'PreResNet110': model_cls = PreResNet110 elif args.model == 'WideResNet28x10': model_cls = WideResNet28x10 else: raise RuntimeError('Invalid model choice.') model = L.Classifier(model_cls(class_labels)) if args.swa: swa_model = L.Classifier(model_cls(class_labels)) swa_n = 0 if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() if args.swa: swa_model.to_gpu() # Data augmentation / preprocess train = TransformDataset(train, partial(transform, train=True)) test = TransformDataset(test, partial(transform, train=False)) optimizer = chainer.optimizers.MomentumSGD(args.lr_init, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(args.wd)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) swa_train_iter = chainer.iterators.SerialIterator( train, args.batchsize, repeat=False, shuffle=False) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) stop_trigger = (args.epoch, 'epoch') # Set up a trainer updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, stop_trigger, out=args.out) # Learning rate adjustment (this function is called every epoch) def lr_schedule(trainer): epoch = trainer.updater.epoch t = epoch / (args.swa_start if args.swa else args.epoch) lr_ratio = args.swa_lr / args.lr_init if args.swa else 0.01 if t <= 0.5: factor = 1.0 elif t <= 0.9: factor = 1.0 - (1.0 - lr_ratio) * (t - 0.5) / 0.4 else: factor = lr_ratio trainer.updater.get_optimizer('main').lr = factor * args.lr_init # The main function for SWA (this function is called every epoch) def avg_weight(trainer): epoch = trainer.updater.epoch if args.swa and (epoch + 1) >= args.swa_start and \ (epoch + 1 - args.swa_start) % args.swa_c_epochs == 0: nonlocal swa_n # moving average alpha = 1.0 / (swa_n + 1) for param1, param2 in zip(swa_model.params(), model.params()): param1.data *= (1.0 - alpha) param1.data += param2.data * alpha swa_n += 1 # This funtion is called before evaluating SWA model # for fixing batchnorm's running mean and variance def fix_swa_batchnorm(evaluator): # Check batchnorm layer bn_flg = False for l in swa_model.links(): if type(l) == L.normalization.batch_normalization.BatchNormalization: bn_flg = True break # Fix batchnorm's running mean and variance if bn_flg: swa_train_iter.reset() with chainer.using_config('train', True): for batch in swa_train_iter: in_arrays = evaluator.converter(batch, evaluator.device) with function.no_backprop_mode(): swa_model(*in_arrays) # Set up extentions trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu), trigger=(5, 'epoch')) if args.swa: eval_points = [x for x in range(args.epoch + 1) if x > args.swa_start and x % 5 == 0] trainer.extend(SwaEvaluator(test_iter, swa_model, device=args.gpu, eval_hook=fix_swa_batchnorm), trigger=triggers.ManualScheduleTrigger(eval_points, 'epoch')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(lr_schedule, trigger=triggers.IntervalTrigger(1, 'epoch')) trainer.extend(avg_weight, trigger=triggers.IntervalTrigger(1, 'epoch')) trainer.extend(extensions.observe_lr()) trainer.extend(extensions.LogReport()) cols = ['epoch', 'lr', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time'] if args.swa: cols = cols[:-1] + ['swa/main/loss', 'swa/main/accuracy'] + cols[-1:] trainer.extend(extensions.PrintReport(cols)) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--device', '-d', type=str, default='0', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() if chainer.get_dtype() == numpy.float16: warnings.warn( 'This example may cause NaN in FP16 mode.', RuntimeWarning) device = chainer.get_device(args.device) print('Device: {}'.format(device)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') device.use() # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.test: train = train[:200] test = test[:200] train_count = len(train) test_count = len(test) model = L.Classifier(models.VGG.VGG(class_labels)) model.to_device(device) optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator( test, args.batchsize, repeat=False, shuffle=False) if device.xp is not chainerx: run_train_loop( optimizer, train_iter, test_iter, train_count, test_count, args.epoch, device) else: warnings.warn( 'Static subgraph optimization does not support ChainerX and will' ' be disabled.', UserWarning) with chainer.using_config('use_static_graph', False): run_train_loop( optimizer, train_iter, test_iter, train_count, test_count, args.epoch, device) # Save the model and the optimizer print('save the model') serializers.save_npz('mlp.model', model) print('save the optimizer') serializers.save_npz('mlp.state', optimizer)
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=150, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--augmentation', action='store_true', help='Apply augmentation.') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') chainer.cuda.set_max_workspace_size(512 * 1024 * 1024) chainer.config.autotune = True chainer.config.cudnn_fast_batch_normalization = True # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') model = L.Classifier(ResNet(class_labels, augmentation=args.augmentation)) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by 0.2 every 60 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.2), trigger=(60, 'epoch')) # Take a snapshot at every 50 epochs trainer.extend( extensions.snapshot(filename='snaphot_epoch_{.updater.epoch}'), trigger=(50, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--device', '-d', type=str, default='0', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--early-stopping', type=str, help='Metric to watch for early stopping') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() if chainer.get_dtype() == numpy.float16: warnings.warn('This example may cause NaN in FP16 mode.', RuntimeWarning) device = chainer.get_device(args.device) if device.xp is chainerx: sys.stderr.write('This example does not support ChainerX devices.\n') sys.exit(1) print('Device: {}'.format(device)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') device.use() # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') model = L.Classifier(models.VGG.VGG(class_labels)) model.to_device(device) optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) stop_trigger = (args.epoch, 'epoch') # Early stopping option if args.early_stopping: stop_trigger = triggers.EarlyStoppingTrigger( monitor=args.early_stopping, verbose=True, max_trigger=(args.epoch, 'epoch')) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. # TODO(hvy): Support ChainerX if device.xp is not chainerx: trainer.extend(extensions.DumpGraph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--seed', '-s', type=int, default=0, help='seed for random values') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.1, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--aug_method', '-a', default='both', choices=['none', 'mixup', 'random_erasing', 'both'], help='data augmentation strategy') parser.add_argument('--model', '-m', default='pyramid', choices=['resnet50', 'pyramid'], help='data augmentation strategy') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print(args) print('') set_random_seed(args.seed) # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() # for mean-teacher experiment #train = train[:-10000] #print(len(train)) elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.model == 'resnet50': predictor = ResNet(None) predictor.fc6 = L.Linear(2048, class_labels) elif args.model == 'pyramid': predictor = shaked_pyramid_net.PyramidNet(skip=True) # 下の方にあるtrain dataのtransformの条件分岐とかぶってるけどなー if args.aug_method in ('both', 'mixup'): lossfun = soft_label_classification_loss accfun = soft_label_classification_acc else: lossfun = F.softmax_cross_entropy accfun = F.accuracy model = L.Classifier(predictor, lossfun=lossfun, accfun=accfun) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) # augment train data if args.aug_method == 'none': print('data augmentationなしです') train = dataset.SingleCifar10((train, None)) elif args.aug_method in ('both', 'mixup'): use_random_erasing = args.aug_method == 'both' train = dataset.PairwiseCifar10((train, None)) train = chainer.datasets.transform_dataset.TransformDataset( train, transformer.MixupTransform(use_random_erasing=use_random_erasing)) elif args.aug_method == 'random_erasing': train = dataset.SingleCifar10((train, None)) train = chainer.datasets.transform_dataset.TransformDataset( train, transformer.RandomErasingTransform()) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch eval_trigger = (1, 'epoch') trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu), trigger=eval_trigger) # Reduce the learning rate by half every 25 epochs. lr_drop_epoch = [int(args.epoch * 0.5), int(args.epoch * 0.75)] lr_drop_ratio = 0.1 print(f'lr schedule: {lr_drop_ratio}, timing: {lr_drop_epoch}') def lr_drop(trainer): trainer.updater.get_optimizer('main').lr *= lr_drop_ratio trainer.extend(lr_drop, trigger=chainer.training.triggers.ManualScheduleTrigger( lr_drop_epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # interact with chainerui trainer.extend(CommandsExtension(), trigger=(100, 'iteration')) # save args save_args(args, args.out) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
elif dataset == "cifar10": # label n_class = 10 # cifar10のロード train, test = get_cifar10() # 本来ならiteratorで回すがわかりやすようにデータとラベルで分割 train_dataset, test_dataset = split_dataset(train, test) train_x = np.array(train_dataset[0]) train_y = np.array(train_dataset[1]) elif dataset == "cifar100": # label n_class = 100 # cifar100 train, test = get_cifar100() # 本来ならiteratorで回すがわかりやすようにデータとラベルで分割 train_dataset, test_dataset = split_dataset(train, test) train_x = np.array(train_dataset[0]) train_y = np.array(train_dataset[1]) else: raise RuntimeError('Invalid dataset choice.') loss = 0 average_loss = [] accuracy_list = [] n_train_data = len(train) start_at = time.time() cur_at = start_at
def make_datasets(): Images = [] #3*32*32 Nums = [] Images_test = [] #3*32*32 Nums_test = [] cf100_train, cf100_test = get_cifar100() cf10_train, cf10_test = get_cifar10() #cifar100のリストへの保存 for i in cf100_train: if (i[1] == 8 or i[1] == 48 or i[1] == 90): #bicycle 8,motorcycle 48, train 90 Images.append(i[0]) itrans = i[0][:, :, ::-1] Images.append(itrans) if (i[1] == 8): Nums.append(0) Nums.append(0) elif (i[1] == 48): Nums.append(1) Nums.append(1) else: Nums.append(2) Nums.append(2) for j in cf100_test: if (j[1] == 8 or j[1] == 48 or j[1] == 90): Images_test.append(j[0]) jtrans = j[0][:, :, ::-1] Images_test.append(jtrans) if (j[1] == 8): Nums_test.append(0) Nums_test.append(0) elif (j[1] == 48): Nums_test.append(1) Nums_test.append(1) else: Nums_test.append(2) Nums_test.append(2) for k in cf10_train: if (k[1] == 1): #automobile Images.append(k[0]) Images.append(k[0][:, :, ::-1]) Nums.append(3) Nums.append(3) if (len(Images) == 2000 * 2): break for k in cf10_test: if (k[1] == 1): #automobile Images_test.append(k[0]) Images_test.append(k[0][:, :, ::-1]) Nums_test.append(3) Nums_test.append(3) if (len(Images_test) == 400 * 2): break data_dir_path = u"./dataset/from_vtest/" file_list = os.listdir(r'./dataset/from_vtest/') for file_name in file_list: root, ext = os.path.splitext(file_name) if ext == u'.png' or u'.jpeg' or u'.jpg': abs_name = data_dir_path + '/' + file_name im = Image.open(abs_name) im = im.resize((32, 32)) imarray = numpy.asarray(im) Images.append( imarray.transpose(2, 0, 1).astype(numpy.float32) / 256) s = imarray.transpose(2, 0, 1).astype(numpy.float32) / 256 strans = s[:, :, ::-1] Images.append(strans) Nums.append(4) Nums.append(4) if (len(Images) == 2500 * 2 + 2000): break for i in range(3000, 3200): file_name = file_list[i] root, ext = os.path.splitext(file_name) if ext == u'.png' or u'.jpeg' or u'.jpg': abs_name = data_dir_path + '/' + file_name im = Image.open(abs_name) im = im.resize((32, 32)) imarray = numpy.asarray(im) Images_test.append( imarray.transpose(2, 0, 1).astype(numpy.float32) / 256) t = imarray.transpose(2, 0, 1).astype(numpy.float32) / 256 ttrans = t[:, :, ::-1] Images_test.append(ttrans) Nums_test.append(4) Nums_test.append(4) trains = tuple_dataset.TupleDataset(Images, Nums) tests = tuple_dataset.TupleDataset(Images_test, Nums_test) return trains, tests
elif dataset == "cifar10": # label n_class = 10 # cifar10のロード train, test = get_cifar10() # 本来ならiteratorで回すがわかりやすようにデータとラベルで分割 train_dataset, test_dataset = split_dataset(train, test) train_x = xp.array(train_dataset[0]) train_y = xp.array(train_dataset[1]) elif dataset == "cifar100": # label n_class = 100 # cifar100 train, test = get_cifar100() # 本来ならiteratorで回すがわかりやすようにデータとラベルで分割 train_dataset, test_dataset = split_dataset(train, test) train_x = xp.array(train_dataset[0]) train_y = xp.array(train_dataset[1]) else: raise RuntimeError('Invalid dataset choice.') loss = 0 gen_loss = [] dis_loss = [] n_train_data = len(train_x) # ハイパーパラメータ
def main(): parser = argparse.ArgumentParser(description="Chainer CIFAR example:") parser.add_argument("--dataset", "-d", default="cifar10", help="The dataset to use: cifar10 or cifar100") parser.add_argument("--batchsize", "-b", type=int, default=128, help="Number of images in each mini-batch") parser.add_argument("--epoch", "-e", type=int, default=300, help="Number of sweeps over the dataset to train") parser.add_argument("--gpu", "-g", type=int, default=0, help="GPU ID (negative value indicates CPU)") parser.add_argument("--out", "-o", default="result", help="Directory to output the result") parser.add_argument("--resume", "-r", default="", help="Resume the training from snapshot") args = parser.parse_args() print("GPU: {}".format(args.gpu)) print("# Minibatch-size: {}".format(args.batchsize)) print("# epoch: {}".format(args.epoch)) print("") # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == "cifar10": print("Using CIFAR10 dataset.") class_labels = 10 train, test = get_cifar10() elif args.dataset == "cifar100": print("Using CIFAR100 dataset.") class_labels = 100 train, test = get_cifar100() else: raise RuntimeError("Invalid dataset choice.") model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(0.1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, "epoch"), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(TestModeEvaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift("lr", 0.5), trigger=(25, "epoch")) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph("main/loss")) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, "epoch")) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport( ["epoch", "main/loss", "validation/main/loss", "main/accuracy", "validation/main/accuracy", "elapsed_time"] ) ) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(TestModeEvaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--same_batch', '-s', type=bool, default=False, help='if True and use multi gpu, batchsize*gpu_num') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu_num', '-gn', type=int, default=1, help='a number of GPU(negative value indicates CPU)') parser.add_argument('--gpu', '-g', type=int, default=0, help='main GPU ID (negative value indicates CPU)') parser.add_argument('--model', '-m', default='allconvnet', help='choose training model') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() print('# a number of using GPU: {}'.format(args.gpu_num)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) # make dump name with this experiment dump_dir = './result/train_log' + '_gpu_num-' + str( args.gpu_num) + "_model-" + str(args.model) + '_epoch-' + str( args.epoch) + '_batchsize-' + str( args.batchsize) + '_datset-' + str(args.dataset) # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('# Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('# Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.model == 'resnet': print('# cnn_model: resnet') model = L.Classifier(ResNet(class_labels=class_labels)) elif args.model == 'allconvnet': print('# cnn_model: AllConvNetBN') model = L.Classifier(AllConvNetBN(class_labels)) else: raise RuntimeError('Invalid dataset choice.') if args.gpu >= 0 and args.gpu_num >= 1: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current #optimizer = chainer.optimizers.MomentumSGD(0.01) optimizer = chainer.optimizers.Adam() optimizer.setup(model) #optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) #multi gpu環境、つまりParallelUpdaterを使った並列GPU処理だとbatchsize = batchsize/gpu_num batchsize = args.batchsize * args.gpu_num if args.same_batch else args.batchsize train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) # Set up a trainer if args.gpu_num <= 1: print("# main gpu: ", args.gpu) model.to_gpu() # Copy the model to the GPU updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) elif args.gpu_num >= 2: _devices = {'main': args.gpu} print("# main gpu: ", args.gpu) for g_idx in range(1, args.gpu_num): _devices[str(g_idx)] = g_idx print("# using gpus: ", _devices) updater = training.ParallelUpdater( train_iter, optimizer, devices=_devices, ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=dump_dir) # Evaluate the model with the test dataset for each epoch trainer.extend(TestModeEvaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('alpha', 0.5), trigger=(20, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: print('Resume from a snapshot') chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def train(): device = chainer.get_device(device_id) device.use() print('Device: {}'.format(device)) print('# Minibatch-size: {}'.format(batchsize)) print('# epoch: {}'.format(epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') model = L.Classifier(models.VGG.VGG(class_labels)) model.to_device(device) optimizer = chainer.optimizers.MomentumSGD(learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) stop_trigger = (epoch, 'epoch') # Set up a trainer out = './result' updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, stop_trigger, out=out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. # TODO(imanishi): Support for ChainerX if not isinstance(device, backend.ChainerxDevice): trainer.extend(extensions.DumpGraph('main/loss')) # Take a snapshot at each epoch trainer.extend( extensions.snapshot(filename='snaphot_epoch_{.updater.epoch}')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--early-stopping', type=str, help='Metric to watch for early stopping') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() device = chainer.get_device(args.device) device.use() print('Device: {}'.format(device)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') model = L.Classifier(models.VGG.VGG(class_labels)) model.to_device(device) optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) stop_trigger = (args.epoch, 'epoch') # Early stopping option if args.early_stopping: stop_trigger = triggers.EarlyStoppingTrigger( monitor=args.early_stopping, verbose=True, max_trigger=(args.epoch, 'epoch')) # Set up a trainer updater = training.updaters.StandardUpdater( train_iter, optimizer, device=device) trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. # TODO(imanishi): Support for ChainerX if not isinstance(device, backend.ChainerxDevice): trainer.extend(extensions.DumpGraph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot( filename='snaphot_epoch_{.updater.epoch}')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()