def main(): parser = argparse.ArgumentParser( description='ChainerMN example: pipelined neural network') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: comm = chainermn.create_communicator('pure_nccl') device = comm.intra_rank else: comm = chainermn.create_communicator('naive') device = -1 if comm.size != 2: raise ValueError( 'This example can only be executed on exactly 2 processes.') if comm.rank == 0: print('==========================================') if args.gpu: print('Using GPUs') print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') if comm.rank == 0: model = L.Classifier(MLP0(comm, args.unit)) elif comm.rank == 1: model = MLP1(comm, args.unit, 10) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Iterate dataset only on worker 0. train, test = chainer.datasets.get_mnist() if comm.rank == 1: train = chainermn.datasets.create_empty_dataset(train) test = chainermn.datasets.create_empty_dataset(test) train_iter = chainer.iterators.SerialIterator(train, args.batchsize, shuffle=False) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Some display and output extentions are necessary only for worker 0. if comm.rank == 0: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): # This script is almost identical to train_mnist.py. The only difference is # that this script uses data-parallel computation on two GPUs. # See train_mnist.py for more details. parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=400, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu0', '-g', type=int, default=0, help='First GPU ID') parser.add_argument('--gpu1', '-G', type=int, default=1, help='Second GPU ID') parser.add_argument('--out', '-o', default='result_parallel', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}, {}'.format(args.gpu0, args.gpu1)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') chainer.cuda.get_device_from_id(args.gpu0).use() model = L.Classifier(train_mnist.MLP(args.unit, 10)) optimizer = chainer.optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # ParallelUpdater implements the data-parallel gradient computation on # multiple GPUs. It accepts "devices" argument that specifies which GPU to # use. updater = training.ParallelUpdater( train_iter, optimizer, # The device of the name 'main' is used as a "master", while others are # used as slaves. Names other than 'main' are arbitrary. devices={ 'main': args.gpu0, 'second': args.gpu1 }, ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser(description='ChainerMN example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = L.Classifier(MLP(args.unit, 10)) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Create a multi node evaluator from a standard Chainer evaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
# Chain作成 class Model(Chain): def __init__(self): super(Model, self).__init__( l1=L.Linear(2, n_units), l2=L.Linear(n_units, 2), ) def __call__(self, x_): h1 = F.tanh(self.l1(x_)) y_ = self.l2(h1) return y_ # Classifier Chain作成 model_ = L.Classifier(Model()) # optimizer作成 optimizer = optimizers.Adam() optimizer.setup(model_) # 学習 x = Variable(X.astype(np.float32)) t = Variable(y.astype(np.int32)) for _ in range(20000): optimizer.update(model_, x, t) def predict(model, x_data): x_ = Variable(x_data.astype(np.float32))
return y class Classifier(Chain): def __init__(self, predictor): super(Classifier, self).__init__(predictor=predictor) def __call__(self, x, t): y = self.predictor(x) self.loss = F.softmax_cross_entropy(y, t) self.accuracy = F.accuracy(y, t) return self.loss # Setup optimizer model = L.Classifier(MLP()) optimizer = optimizers.Adam() optimizer.setup(model) # Learning loop for epoch in xrange(1, n_epoch + 1): print 'epoch', epoch perm = np.random.permutation(N) sum_accuracy = 0 sum_loss = 0 for i in xrange(0, N, batchsize): x_batch = Variable(x_train[perm[i:i + batchsize]]) y_batch = Variable(y_train[perm[i:i + batchsize]])
def get_nutrition_model(args, alphabet_size): from commoncrawl_dataset import get_fields fields, add_recipe_yield, union = get_fields(args.dataset) n_fields = len(fields) + (1 if add_recipe_yield else 0) network = args.network parts = network.split("-") types = {'multiple': 'multiple', 'multiple_wide': 'multiple-wide'} layer_type = types[parts[-2]] if parts[-2] in types else '3x3' if args.categories is None: ingredient = NutritionRegressionIngredient( alphabet_size, n_fields, args.num_ingredients, args.num_chars, parts[-1], layer_type, multiplication=args.multiplication) recipe = SummedRegressionRecipe(True, recipe_size=args.num_ingredients, ingredient=ingredient) if not args.mae: model = RMSE(recipe, n_fields, calculate_mae=args.validate_on_mae) else: model = MAE(recipe, n_fields) else: ing = SimpleIngredient(True, alphabet_size, embed_input=False, depth=parts[-1], width=args.num_chars, inception=True, layer_type=layer_type) if args.dataset != 'the-five-union': recipe = PooledRecipeCuisine(True, args.num_ingredients, ing, args.categories, 'both', 1024) model = L.Classifier(recipe) else: tasks = [{ 'name': 'calories', 'factor': .2, 'loss_fun': F.loss.softmax_cross_entropy.softmax_cross_entropy, 'acc_fun': F.evaluation.accuracy.Accuracy(ignore_label=-1) }, { 'name': 'cholesterol', 'factor': .2, 'loss_fun': F.loss.softmax_cross_entropy.softmax_cross_entropy, 'acc_fun': F.evaluation.accuracy.Accuracy(ignore_label=-1) }, { 'name': 'protein', 'factor': .2, 'loss_fun': F.loss.softmax_cross_entropy.softmax_cross_entropy, 'acc_fun': F.evaluation.accuracy.Accuracy(ignore_label=-1) }, { 'name': 'transFat', 'factor': .2, 'loss_fun': F.loss.softmax_cross_entropy.softmax_cross_entropy, 'acc_fun': F.evaluation.accuracy.Accuracy(ignore_label=-1) }, { 'name': 'recipeYield', 'factor': .2, 'loss_fun': F.loss.softmax_cross_entropy.softmax_cross_entropy, 'acc_fun': F.evaluation.accuracy.Accuracy(ignore_label=-1) }] if args.split == 'early': recipe = MultiTaskPoolFive(True, args.num_ingredients, ing, args.categories, 'both', 1024) else: recipe = MultiTaskPoolFiveLaterSplit(True, args.num_ingredients, ing, args.categories, 'both', 1024) from links import MultiTaskClassifier model = MultiTaskClassifier(recipe, tasks) return model
def train(network_object, dataset, testdataset, batchsize=128, gpu_id=0, max_epoch=20, postfix='', base_lr=0.01, lr_decay=None): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=400, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu0', '-g', type=int, default=0, help='First GPU ID') parser.add_argument('--gpu1', '-G', type=int, default=1, help='Second GPU ID') parser.add_argument('--gpu2', type=int, default=2, help='Third GPU ID') parser.add_argument('--gpu3', type=int, default=3, help='Fourth GPU ID') parser.add_argument('--out', '-o', default='result_parallel', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() # prepare dataset train_size = int(len(dataset) * 1.0) train, _ = chainer.datasets.split_dataset_random(Honkan_dataset, train_size, seed=0) #train_size = int(len(train_val) * 0.9) #train, valid = chainer.datasets.split_dataset_random(train_val, train_size, seed=0) test_size = int(len(testdataset) * 0.2) test, valid = chainer.datasets.split_dataset_random(Honkan_testdataset, test_size, seed=0) # data augement train_dataset = TransformDataset(train, partial(transform, train=True)) valid_dataset = TransformDataset(valid, partial(transform, train=True)) test_dataset = TransformDataset(test, partial(transform, train=True)) # 2. Iterator #train_iter = iterators.SerialIterator(train, batchsize) train_iter = iterators.MultiprocessIterator(train, batchsize) #train_iter = iterators.MultiprocessIterator(train, batchsize, n_processes=1) #valid_iter = iterators.SerialIterator(valid, batchsize, False, False) valid_iter = iterators.MultiprocessIterator(valid, batchsize, False, False) #valid_iter = chainer.iterators.MultiprocessIterator(valid, batchsize, repeat=False,shuffle=False, n_processes=1) # 3. Model net = L.Classifier(network_object) # if gpu_id >= 0: # cuda.check_cuda_available() # chainer.cuda.get_device(gpu_id).use() # net.to_gpu(gpu_id) # 4. Optimizer optimizer = optimizers.MomentumSGD(lr=base_lr).setup(net) #optimizer = optimizers.Adam().setup(net) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005)) # 5. Updater # updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) updater = training.updaters.ParallelUpdater( train_iter, optimizer, devices={ 'main': args.gpu0, 'second': args.gpu1, 'third': args.gpu2, 'fourth': args.gpu3 }, ) # 6. Trainer trainer = training.Trainer(updater, (max_epoch, 'epoch'), out='{}_Honkan_result_{}'.format( network_object.__class__.__name__, postfix)) # 7. Trainer extensions trainer.extend(extensions.LogReport()) trainer.extend( extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}')) #trainer.extend(extensions.Evaluator(valid_iter, net, device=gpu_id), name='val') trainer.extend(extensions.Evaluator(valid_iter, net, device=args.gpu0), name='val') trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'l1/W/data/std', 'elapsed_time' ])) trainer.extend( extensions.PlotReport(['l1/W/data/std'], x_key='epoch', file_name='std.png')) trainer.extend( extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/accuracy', 'val/main/accuracy'], x_key='epoch', file_name='accuracy.png')) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.ProgressBar()) # 定期的に状態をシリアライズ(保存)する機能 trainer.extend( extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}')) trainer.extend( extensions.snapshot_object(net.predictor, filename='model_epoch-{.updater.epoch}')) if lr_decay is not None: trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=lr_decay) trainer.run() del trainer # 8. Evaluation test_iter = iterators.SerialIterator(test, batchsize, False, False) #test_iter = iterators.MultiprocessIterator(test, batchsize, False, False) #test_evaluator = extensions.Evaluator(test_iter, net, device=gpu_id) test_evaluator = extensions.Evaluator(test_iter, net, device=args.gpu0) results = test_evaluator() print('Test accuracy:', results['main/accuracy']) out_put = '{}_Honkan_result_{}'.format(network_object.__class__.__name__, postfix) save_model = out_put + '/model.model' save_optimizer = out_put + '/model_optimizer.npz' chainer.serializers.save_npz(save_model, net) chainer.serializers.save_npz(save_optimizer, optimizer) return net
h = F.max_pooling_2d(x, 2) h1 = F.max_pooling_2d(F.relu(self.conv1(h)), 2) h2 = F.max_pooling_2d(F.relu(self.conv2(h1)), 2) h3 = F.max_pooling_2d(F.relu(self.conv3(h2)), 2) h4 = F.max_pooling_2d(F.relu(self.conv4(h3)), 2) h5 = F.max_pooling_2d(F.relu(self.conv5(h4)), 2) y = F.relu(self.l1(h5)) y1 = F.relu(self.l2(y)) y2 = F.relu(self.l3(y1)) y3 = F.relu(self.l4(y2)) return self.l4(y3) #%% model0 = MLP() model = L.Classifier(model0) chainer.cuda.get_device(0).use() model.to_gpu() # Setup an optimizer optimizer = optimizers.Adam() optimizer.setup(model) #%% updater = training.StandardUpdater(train_iter, optimizer, device=gpu_flag) trainer = training.Trainer(updater, (15, 'epoch'), out='result') trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_flag)) trainer.extend(extensions.LogReport(trigger=(1, 'epoch'))) trainer.extend( extensions.PrintReport( ['epoch', 'main/accuracy', 'validation/main/accuracy'])) trainer.extend(extensions.ProgressBar())
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=20, help='Number of examples in each mini-batch') parser.add_argument('--bproplen', '-l', type=int, default=35, help='Number of words in each mini-batch ' '(= length of truncated BPTT)') parser.add_argument('--epoch', '-e', type=int, default=39, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.set_defaults(test=False) parser.add_argument('--unit', '-u', type=int, default=650, help='Number of LSTM units in each layer') args = parser.parse_args() # Load the Penn Tree Bank long word sequence dataset train, val, test = chainer.datasets.get_ptb_words() n_vocab = max(train) + 1 # train is just an array of integers print('#vocab =', n_vocab) if args.test: train = train[:100] val = val[:100] test = test[:100] train_iter = ParallelSequentialIterator(train, args.batchsize) val_iter = ParallelSequentialIterator(val, 1, repeat=False) test_iter = ParallelSequentialIterator(test, 1, repeat=False) # Prepare an RNNLM model rnn = RNNForLM(n_vocab, args.unit) model = L.Classifier(rnn) model.compute_accuracy = False # we only want the perplexity if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # make the GPU current model.to_gpu() # Set up an optimizer optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) # Set up a trainer updater = BPTTUpdater(train_iter, optimizer, args.bproplen, args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) eval_model = model.copy() # Model with shared params and distinct states eval_rnn = eval_model.predictor eval_rnn.train = False trainer.extend( extensions.Evaluator( val_iter, eval_model, device=args.gpu, # Reset the RNN state at the beginning of each evaluation eval_hook=lambda _: eval_rnn.reset_state())) interval = 10 if args.test else 500 trainer.extend( extensions.LogReport(postprocess=compute_perplexity, trigger=(interval, 'iteration'))) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'perplexity', 'val_perplexity']), trigger=(interval, 'iteration')) trainer.extend( extensions.ProgressBar(update_interval=1 if args.test else 10)) trainer.extend(extensions.snapshot()) trainer.extend( extensions.snapshot_object(model, 'model_iter_{.updater.iteration}')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() # Evaluate the final model print('test') eval_rnn.reset_state() evaluator = extensions.Evaluator(test_iter, eval_model, device=args.gpu) result = evaluator() print('test perplexity:', np.exp(float(result['main/loss'])))
# 参数定义 batchsize = 100 # 批次大小 epochs = 20 # 对训练集重复训练多少次 gpuid = 1 outdir = 'result' unit = 1000 # 隐藏层神经元数量 print(f'# GPU: {gpuid}') print(f'# unit: {unit}') print(f'# Minibatch-size: {batchsize}') print(f'# epochs: {epochs}') print('') # set up a neural network to train (构建一个神经网络模型到第一个GPU上) model = L.Classifier(MLP(unit, 10)) if gpuid >= 0: # use GPU chainer.backends.cuda.get_device_from_id(gpuid).use() # copy the model to the gpu mode.to_gpu() # set an optimizer (设置优化算法) optimizer = chainer.optimizers.Adam() optimizer.setup(model) # load the dataset (加载数据集[训练集和测试集]) train, test = chainer.datasets.get_fashion_mnist() train_count, test_count = len(train), len(test) train_iter = SerialIterator(train, batchsize)
h_pool = [None for _ in self.filter_height] # フィルタ形毎にループを回す for i, filter_size in enumerate(self.filter_height): # Convolition層を通す h_conv[i] = F.relu(self[i](x)) # Pooling層を通す h_pool[i] = F.max_pooling_2d( h_conv[i], (self.max_sentence_len + 1 - filter_size)) # Convolution+Poolingを行った結果を結合する concat = F.concat(h_pool, axis=2) # 結合した結果に対してDropoutをかける h_l1 = F.dropout(F.tanh(self[self.cnv_num + 0](concat)), ratio=0.5, train=train) # Dropoutの結果を出力層まで圧縮する y = self[self.cnv_num + 1](h_l1) return y if __name__ == '__main__': model = L.Classifier( CNNSC(input_channel=1, output_channel=100, filter_height=[3, 4, 5], filter_width=20, n_label=2, max_sentence_len=20)) print('done process')
chainer.optimizers.SGD(lr=2e-4), comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(1e-2)) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) evaluator = extensions.Evaluator(valid_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) prepare_extensions(trainer, evaluator, args, comm) trainer.run() if comm.rank == 0: throughput = datasize / trainer.elapsed_time print('Throughput: {} [images/sec.] ({} / {})'.format( throughput, datasize, trainer.elapsed_time)) model_filepath = os.path.join(args.out, 'trained.model') chainer.serializers.save_npz(model_filepath, model) if __name__ == '__main__': args = parse_cmd_args() with open("train.pkl", "rb") as f: x, t = pickle.load(f) M = L.Classifier(model.CNN3()) serializers.load_npz("cnn3_multi_node_3_/trained.model", M) main(args, M, x, t)
def objective(trial, comm): # Sample an architecture. model = L.Classifier(create_model(trial)) # Setup optimizer. optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(model) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) # Setup dataset and iterator. Only worker 0 loads the whole dataset. # The dataset of worker 0 is evenly split and distributed to all workers. if comm.rank == 0: train, test = chainer.datasets.get_mnist() rng = np.random.RandomState(0) train = chainer.datasets.SubDataset(train, 0, N_TRAIN_EXAMPLES, order=rng.permutation(len(train))) test = chainer.datasets.SubDataset(test, 0, N_TEST_EXAMPLES, order=rng.permutation(len(test))) else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm) train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE, shuffle=True) test_iter = chainer.iterators.SerialIterator(test, BATCHSIZE, repeat=False, shuffle=False) # Setup trainer. updater = chainer.training.StandardUpdater(train_iter, optimizer) trainer = chainer.training.Trainer(updater, (EPOCH, 'epoch')) # Add Chainer extension for pruners. trainer.extend( optuna.integration.ChainerPruningExtension(trial, 'validation/main/accuracy', (PRUNER_INTERVAL, 'epoch'))) evaluator = chainer.training.extensions.Evaluator(test_iter, model) trainer.extend(chainermn.create_multi_node_evaluator(evaluator, comm)) log_report_extension = chainer.training.extensions.LogReport(log_name=None) trainer.extend(log_report_extension) if comm.rank == 0: trainer.extend(chainer.training.extensions.ProgressBar()) # Run training. # Please set show_loop_exception_msg False to inhibit messages about TrialPruned exception. # ChainerPruningExtension raises TrialPruned exception to stop training, and # trainer shows some messages every time it receive TrialPruned. trainer.run(show_loop_exception_msg=False) # Evaluate. evaluator = chainer.training.extensions.Evaluator(test_iter, model) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) report = evaluator() return report['main/accuracy']
def main(args): # 各種データをユニークな名前で保存するために時刻情報を取得する exec_time = GET.datetimeSHA() # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. # 活性化関数を取得する actfun1 = GET.actfun(args.actfun1) actfun2 = GET.actfun(args.actfun2) # モデルを決定する if args.network == 0: from Lib.network import JC_DDUU as JC else: from Lib.network2 import JC_UDUD as JC model = L.Classifier( JC(n_unit=args.unit, layer=args.layer_num, rate=args.shuffle_rate, actfun1=actfun1, actfun2=actfun2, dropout=args.dropout, view=args.only_check), lossfun=GET.lossfun(args.lossfun) ) # Accuracyは今回使用しないのでFalseにする # もしも使用したいのであれば、自分でAccuracyを評価する関数を作成する必要あり? model.compute_accuracy = False # Setup an optimizer optimizer = GET.optimizer(args.optimizer).setup(model) # Load dataset train, test, _ = GET.imgData(args.in_path) train = ResizeImgDataset(train, args.shuffle_rate) test = ResizeImgDataset(test, args.shuffle_rate) # predict.pyでモデルを決定する際に必要なので記憶しておく model_param = F.args2dict(args) model_param['shape'] = train[0][0].shape train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater( train_iter, optimizer, device=args.gpu_id ) trainer = training.Trainer( updater, (args.epoch, 'epoch'), out=args.out_path ) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu_id)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend( extensions.dump_graph('main/loss', out_name=exec_time + '_graph.dot') ) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend( extensions.snapshot(filename=exec_time + '_{.updater.epoch}.snapshot'), trigger=(frequency, 'epoch') ) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(log_name=exec_time + '.log')) # trainer.extend(extensions.observe_lr()) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( PlotReportLog(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png') ) # trainer.extend( # PlotReportLog(['lr'], # 'epoch', file_name='lr.png', val_pos=(-80, -60)) # ) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', # 'lr', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # Resume from a snapshot if args.resume: chainer.serializers.load_npz(args.resume, trainer) # Set pruning # http://tosaka2.hatenablog.com/entry/2017/11/17/194051 masks = pruning.create_model_mask(model, args.pruning, args.gpu_id) trainer.extend(pruning.pruned(model, masks)) # Make a specified GPU current if args.gpu_id >= 0: chainer.backends.cuda.get_device_from_id(args.gpu_id).use() # Copy the model to the GPU model.to_gpu() chainer.global_config.autotune = True else: model.to_intel64() # predict.pyでモデルのパラメータを読み込むjson形式で保存する if args.only_check is False: F.dict2json(args.out_path, exec_time + '_train', model_param) # Run the training trainer.run() # 最後にモデルを保存する # スナップショットを使ってもいいが、 # スナップショットはファイルサイズが大きい chainer.serializers.save_npz( F.getFilePath(args.out_path, exec_time, '.model'), model )
# class Classifier(Chain): # def __init__(self, predictor): # super(Classifier, self).__init__() # with self.init_scope(): # self.predictor = predictor # # def __call__(self, x, t): # y = self.predictor(x) # loss = F.softmax_cross_entropy(y, t) # accuracy = F.accuracy(y, t) # report({'loss': loss, 'accuracy': accuracy}, self) # return loss # chain を作る, 目的関数を決める model = L.Classifier(MLP(100, 10)) # the input size, 784, is inferred # optimizer を定義する optimizer = optimizers.SGD() optimizer.setup(model) updater = training.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (20, 'epoch'), out='result') trainer.extend(extensions.Evaluator(test_iter, model)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport(['epoch', 'main/accuracy', 'validation/main/accuracy'])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--trainfile', '-t', type=str, default='test', help='training data file path') parser.add_argument('--validationfile', '-v', type=str, default='test', help='training validation data file path') parser.add_argument('--batchsize', '-b', type=int, default=1000, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=20, help='Number of units') parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') parser.add_argument('--snapshot', '-s', type=str, default='result/snapshot_iter_281', help='use model snapshot') args = parser.parse_args() # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = L.Classifier(MLP(args.unit, 10)) '''if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU ''' chainer.serializers.load_npz(args.snapshot, model, path='updater/model:main/') with chainer.using_config('train', False): x = np.asarray([0.0, -9.1, -0.7], dtype=np.float32).reshape(1, 3) #print(np.shape(x)) y = model.predictor(x).array print('予想ラベル:{0}'.format(y.argmax(axis=1)[0]))
def main(): # This script is almost identical to train_mnist.py. The only difference is # that this script uses data-parallel computation on two GPUs. # See train_mnist.py for more details. parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=400, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--out', '-o', default='result_data_parallel', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--device0', '-d', type=str, default='0', help='Device specifier of the first device.' 'Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--device1', '-D', type=str, default='1', help='Device specifier of the second device. ' 'Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu0', '-g', dest='device0', type=int, nargs='?', const=0, help='First GPU ID') group.add_argument('--gpu1', '-G', dest='device1', type=int, nargs='?', const=1, help='Second GPU ID') args = parser.parse_args() device0 = chainer.get_device(args.device0) device1 = chainer.get_device(args.device1) print('Devices: {}, {}'.format(device0, device1)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') device0.use() model = L.Classifier(train_mnist.MLP(args.unit, 10)) optimizer = chainer.optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # ParallelUpdater implements the data-parallel gradient computation on # multiple devices. It accepts "devices" argument that specifies which # device to use. updater = training.updaters.ParallelUpdater( train_iter, optimizer, # The device of the name 'main' is used as a "master", while others are # used as slaves. Names other than 'main' are arbitrary. devices={ 'main': device0, 'second': device1 }, ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=device0)) # TODO(niboshi): Temporarily disabled for chainerx. Fix it. if device0.xp is not chainerx: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = L.Classifier(MLP(args.unit, 10)) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout #trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
# data is list of pairs([image, label]) # devide data for train and test print(len(data)) random.shuffle(data) train = chainer.datasets.LabeledImageDataset(data[len(data) // 10:]) test = chainer.datasets.LabeledImageDataset(data[:len(data) // 10]) # set up iterators train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # set up model model = L.Classifier(Network(n_class)) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() # set up an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # set up updater updater = chainer.training.StandardUpdater(train_iter, optimizer, device=args.gpu) # set up trainer trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), 'result')
def main(arg_list=None): parser = argparse.ArgumentParser(description='Chainer Evaluation') parser.add_argument('--network', '-n', default='ff', help='Neural network type, either "ff" or "lstm"') parser.add_argument('--model', '-m', default='', help='Path to the model') parser.add_argument('--units', '-u', type=int, nargs='+', default=[1024], help='Number of units') parser.add_argument('--layers', '-l', type=int, default=2, help='Number of hidden layers') parser.add_argument('--activation', '-a', default='relu', help='FF activation function (sigmoid, tanh or relu)') parser.add_argument('--tdnn-ksize', type=int, nargs='+', default=[5], help='TDNN kernel size') parser.add_argument('--timedelay', type=int, default=0, help='Delay target values by this many time steps') parser.add_argument('--splice', type=int, default=0, help='Splicing size') parser.add_argument( '--dropout', '-d', type=float, nargs='+', default=[0], help= 'Dropout rate (0 to disable). In case of Zoneout LSTM, this parameter has 2 arguments: c_ratio h_ratio' ) parser.add_argument('--tri', action='store_true', help='Use triphones') parser.add_argument('--ft', default='final.feature_transform', help='Kaldi feature transform file') parser.add_argument( '--data-dir', default='data/fmllr', help= 'Data directory, this will be prepended to data files and feature transform' ) parser.add_argument( '--offset-dir', default='data', help='Data directory, this will be prepended to offset files') parser.add_argument( '--ivector-dir', help='Data directory, this will be prepended to ivector files') parser.add_argument('--recog-dir', required=True, help='Directory with recognizer files') parser.add_argument('--utt-list-dir', default='data', help='Directory with utterance lists') parser.add_argument('--data', default='data_{}.npy', help='Data file') parser.add_argument('--offsets', default='offsets_{}.npy', help='Offset file') parser.add_argument('--ivectors', default='ivectors_{}.npy', help='ivectors file') parser.add_argument('--PIP', type=float, default=20) parser.add_argument('--LMW', type=float, default=1) parser.add_argument('--ap-coef', type=float, default=1) parser.add_argument('--ap-file', default='log_ap_Kaldi1909.npy', help='Path relative to recogdir') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--test-or-dev', default='test', help='Test or dev') parser.add_argument('--rpl', action='store_true', help='Use RPL layer with folds') parser.add_argument('--no-rpl-layer', action='store_true', help='Disable RPL layer') parser.add_argument('--rpl-model', default="result_rpl/model", help='RPL layer model') parser.add_argument('--fold-model-dir', default="fold_models", help='Directory with trained fold models') parser.add_argument('--fold-network-pattern', default='fold_{0}.npz', help='Filename pattern of each fold network') parser.add_argument('--master-network', default="-", help='Master network') parser.add_argument('--no-progress', action='store_true', help='Disable progress bar') if arg_list is not None: args = parser.parse_args(list(map(str, arg_list))) else: args = parser.parse_args() num_classes = 1909 if args.tri else 39 chainer.config.train = False if args.activation == "sigmoid": activation = F.sigmoid elif args.activation == "tanh": activation = F.tanh elif args.activation == "relu": activation = F.relu else: print("Wrong activation function specified") return if args.rpl: fold = 0 fold_models = [] if args.master_network != "-": print("Loading master network") master_model = get_nn(args.network, args.layers, args.units, num_classes, activation, args.tdnn_ksize, args.dropout) master_model_cls = L.Classifier(master_model) chainer.serializers.load_npz(args.master_network, master_model_cls) else: master_model = None if args.fold_network_pattern != "-": while True: model_file = Path(args.fold_model_dir, args.fold_network_pattern.format(fold)) if not model_file.is_file(): break print("Loading fold {} network".format(fold)) fold_model = get_nn(args.network, args.layers, args.units, num_classes, activation, args.tdnn_ksize, args.dropout) fold_model_cls = L.Classifier(fold_model) chainer.serializers.load_npz(model_file, fold_model_cls) fold_models.append(fold_model) fold += 1 if args.rpl_model != "-": rpl_model = RPL4(num_classes) rpl_model_cls = L.Classifier(rpl_model) chainer.serializers.load_npz(args.rpl_model, rpl_model_cls) else: rpl_model = None model = NNWithRPL(master_model, fold_models, rpl_model) else: model = get_nn(args.network, args.layers, args.units, num_classes, activation, args.tdnn_ksize, args.dropout) model_cls = L.Classifier(model) chainer.serializers.load_npz(args.model, model_cls) if args.network == "tdnn": splice = (sum(args.tdnn_ksize) - len(args.tdnn_ksize)) // 2 else: splice = args.splice if not args.ft and args.ft != '-': ft = loadKaldiFeatureTransform(str(Path(args.data_dir, args.ft))) if is_nn_recurrent(args.network): dim = ft["shape"][1] zi = ft["shifts"].index(0) ft["rescale"] = ft["rescale"][zi * dim:(zi + 1) * dim] ft["addShift"] = ft["addShift"][zi * dim:(zi + 1) * dim] ft["shape"][0] = dim ft["shifts"] = [0] elif args.network == "tdnn": dim = ft["shape"][1] zi = ft["shifts"].index(0) winlen = 2 * splice + 1 ft["rescale"] = np.tile(ft["rescale"][zi * dim:(zi + 1) * dim], winlen) ft["addShift"] = np.tile(ft["addShift"][zi * dim:(zi + 1) * dim], winlen) ft["shape"][0] = dim * winlen ft["shifts"] = list(range(-splice, splice + 1)) else: ft = None data = np.load(str(Path(args.data_dir, args.data.format(args.test_or_dev)))) if splice > 0: data = splicing(data, range(-splice, splice + 1)) if ft is not None: data = applyKaldiFeatureTransform(data, ft) offsets = np.load( str(Path(args.offset_dir, args.offsets.format(args.test_or_dev)))) if args.ivector_dir is not None: ivectors = np.load( str(Path(args.ivector_dir, args.ivectors.format(args.test_or_dev)))) data = np.concatenate((data, ivectors), axis=1) if args.tri: ap = args.ap_coef * np.load(str(Path(args.recog_dir, args.ap_file))) per = evaluateModelTestTri(model, data, offsets, args.PIP, args.LMW, ap=ap, testOrDev=args.test_or_dev, uttlistdir=args.utt_list_dir, recogdir=args.recog_dir, GPUID=args.gpu, progress=not args.no_progress, rnn=is_nn_recurrent(args.network)) else: print("Monophones not implemented") return print("PER: {0:.2f} %".format(per))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=20, help='Number of examples in each mini-batch') parser.add_argument('--bproplen', '-l', type=int, default=35, help='Number of words in each mini-batch ' '(= length of truncated BPTT)') parser.add_argument('--epoch', '-e', type=int, default=39, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.set_defaults(test=False) parser.add_argument('--unit', '-u', type=int, default=650, help='Number of LSTM units in each layer') args = parser.parse_args() def evaluate(model, iter): # Evaluation routine to be used for validation and test. model.predictor.train = False evaluator = model.copy() # to use different state evaluator.predictor.reset_state() # initialize state evaluator.predictor.train = False # dropout does nothing sum_perp = 0 data_count = 0 for batch in copy.copy(iter): x, t = convert.concat_examples(batch, args.gpu) loss = evaluator(x, t) sum_perp += loss.data data_count += 1 model.predictor.train = True return np.exp(float(sum_perp) / data_count) # Load the Penn Tree Bank long word sequence dataset train, val, test = chainer.datasets.get_ptb_words() n_vocab = max(train) + 1 # train is just an array of integers print('#vocab = {}'.format(n_vocab)) if args.test: train = train[:100] val = val[:100] test = test[:100] # Create the dataset iterators train_iter = train_ptb.ParallelSequentialIterator(train, args.batchsize) val_iter = train_ptb.ParallelSequentialIterator(val, 1, repeat=False) test_iter = train_ptb.ParallelSequentialIterator(test, 1, repeat=False) # Prepare an RNNLM model rnn = train_ptb.RNNForLM(n_vocab, args.unit) model = L.Classifier(rnn) model.compute_accuracy = False # we only want the perplexity if args.gpu >= 0: # Make the specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Set up an optimizer optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) sum_perp = 0 count = 0 iteration = 0 while train_iter.epoch < args.epoch: loss = 0 iteration += 1 # Progress the dataset iterator for bprop_len words at each iteration. for i in range(args.bproplen): # Get the next batch (a list of tuples of two word IDs) batch = train_iter.__next__() # Concatenate the word IDs to matrices and send them to the device # self.converter does this job # (it is chainer.dataset.concat_examples by default) x, t = convert.concat_examples(batch, args.gpu) # Compute the loss at this time step and accumulate it loss += optimizer.target(chainer.Variable(x), chainer.Variable(t)) count += 1 sum_perp += loss.data optimizer.target.cleargrads() # Clear the parameter gradients loss.backward() # Backprop loss.unchain_backward() # Truncate the graph optimizer.update() # Update the parameters if iteration % 20 == 0: print('iteration: {}'.format(iteration)) print('training perplexity: {}'.format( np.exp(float(sum_perp) / count))) sum_perp = 0 count = 0 if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('validation perplexity: {}'.format(evaluate(model, val_iter))) # Evaluate on test dataset print('test') test_perp = evaluate(model, test_iter) print('test perplexity: {}'.format(test_perp)) # Save the model and the optimizer print('save the model') serializers.save_npz('rnnlm.model', model) print('save the optimizer') serializers.save_npz('rnnlm.state', optimizer)
def predict(filter_num=5, inpaint=1, save_file=""): filter_str = str(filter_num) seg = 0 model = L.Classifier(CNN()) # serializers.load_npz("./CNN_modelmedian.npz", model) # serializers.load_npz("./snap_shot/old_medi41/CNN_modelmedian"+(filter_str)+".npz", model) # name, ext = os.path.splitext(os.path.basename(args.model)) serializers.load_npz("./snap_shot/medi41/median41_" + (filter_str) + "snapshot_epoch-50", model, path='updater/model:main/') val_num = 0.6 TP = 0.0 FP = 0.0 FN = 0.0 TN = 0.0 data_channels = 1 data_dir_path1 = u"./data/2.5m_median41" data_dir_path2 = u"./data/2.5m_half" file_list = os.listdir(r'./data/2.5m_half/') # data_dir_path1 = u"./data/half_data" # data_dir_path2 = u"./data/half_data" # file_list = os.listdir(r"../95-5/"+(filter_str)+"/train/") nnum = 0 for file_name in file_list: root, ext = os.path.splitext(file_name) if ext == u'.bmp': nnum = nnum + 1 print(file_name) abs_name1 = data_dir_path1 + '/' + file_name abs_name2 = data_dir_path2 + '/' + file_name file_name = file_name[:-4] if data_channels == 3 or data_channels == 33: src_img = cv2.imread(abs_name1) if inpaint == 1: src_img = cv2.imread("./data/inpaint_area/" + file_name + ".bmp") height, width, channela = src_img.shape if data_channels == 1 or data_channels == 13: src_img = cv2.imread(abs_name1, 0) if inpaint == 1: src_img = cv2.imread( "./data/2.5m_inpaint/" + file_name + ".bmp", 0) print("read inpaint") height, width = src_img.shape if data_channels == 21: src_img41 = cv2.imread(abs_name1, 0) src_img21 = cv2.imread("./data/median21/" + file_name + ".bmp", 0) if inpaint == 1: src_img41 = cv2.imread( "./data/inpaint_median41/" + file_name + ".bmp", 0) src_img21 = cv2.imread( "./data/inpaint_median21/" + file_name + ".bmp", 0) print("read inpaint") # src_img51 = cv2.imread("./data/nlm51/"+file_name+".bmp",0) height, width = src_img.shape # cv2.imshow("a",src_img) # cv2.imshow("b",src_img31) # cv2.imshow("c",src_img) # cv2.waitKey(0) dst_img = cv2.imread(abs_name2) f1_img = cv2.imread(abs_name2) # mask = np.zeros((height, width), np.uint8) # cv2.imshow("a",mask) # cv2.waitKey(0) over_rap = 25 new_img_height = 50 new_img_width = 50 width_split = int(width / (new_img_width - over_rap)) - 1 height_split = int(height / (new_img_height - over_rap)) - 1 a1, b1, c1 = 0, 0, 0 num = 0 for h in range(height_split): height_start = h * over_rap height_end = height_start + new_img_height for w in range(width_split): width_start = w * over_rap width_end = width_start + new_img_width # print(-height_start+height_end) # print(-width_start+width_end) # print("\n\n") num = num + 1 clp1 = src_img[height_start:height_end, width_start:width_end] PIL_data = Image.fromarray(clp1) # im_list = np.asarray(clp1) # #貼り付け # plt.imshow(im_list) # #表示 # plt.show() if data_channels == 3: r, g, b = PIL_data.split() rImgData = np.asarray(np.float32(r) / 255.0) gImgData = np.asarray(np.float32(g) / 255.0) bImgData = np.asarray(np.float32(b) / 255.0) imgData = np.asarray([rImgData, gImgData, bImgData]) # grayImgData = np.asarray(np.float32(PIL_data)/255.0) x = imgData if data_channels == 33: r, g, b = PIL_data.split() rImgData = np.asarray(np.float32(r) / 255.0) gImgData = np.asarray(np.float32(g) / 255.0) bImgData = np.asarray(np.float32(b) / 255.0) seg_n = "seg" if os.path.isfile("./data/" + seg_n + "_hall_batch/" + file_name + "_" + str(num) + ".bmp") == True: seg_img1 = np.array( Image.open("./data/" + seg_n + "_hall_batch/" + file_name + "_" + str(num) + ".bmp").convert('L')) a1 = a1 + 1 else: seg_img1 = np.array( np.full((50, 50), 255, dtype=np.uint8)) if os.path.isfile("./data/" + seg_n + "_shadow_batch/" + file_name + "_" + str(num) + ".bmp") == True: seg_img2 = np.array( Image.open("./data/" + seg_n + "_shadow_batch/" + file_name + "_" + str(num) + ".bmp").convert('L')) b1 = b1 + 1 else: seg_img2 = np.array( np.full((50, 50), 255, dtype=np.uint8)) if os.path.isfile("./data/" + seg_n + "_hyouzi_batch/" + file_name + "_" + str(num) + ".bmp") == True: seg_img3 = np.array( Image.open("./data/" + seg_n + "_hyouzi_batch/" + file_name + "_" + str(num) + ".bmp").convert('L')) c1 = c1 + 1 else: seg_img3 = np.array( np.full((50, 50), 255, dtype=np.uint8)) # seg1 = np.asarray(np.float32(seg_img1) / 255.0) seg2 = np.asarray(np.float32(seg_img2) / 255.0) seg3 = np.asarray(np.float32(seg_img3) / 255.0) imgData = np.asarray( [bImgData, gImgData, rImgData, seg1, seg2, seg3]) x = imgData if data_channels == 1: grayImgData = np.asarray(np.float32(PIL_data) / 255.0) x = grayImgData[None, ...] if data_channels == 13: #領域分割結果を合わせて入力 # print(pathAndLabel[0]) grayImgData = np.asarray(np.float32(PIL_data) / 255.0) if os.path.isfile("./data/2.5m_hall_batch/" + file_name + "_" + str(num) + ".bmp") == True: seg_img1 = np.array( Image.open("./data/2.5m_hall_hall/" + file_name + "_" + str(num) + ".bmp").convert('L')) a1 = a1 + 1 else: seg_img1 = np.array( np.full((50, 50), 0, dtype=np.uint8)) if os.path.isfile("./data/2.5m_shadow_batch/" + file_name + "_" + str(num) + ".bmp") == True: seg_img2 = np.array( Image.open("./data/2.5m_shadow_batch/" + file_name + "_" + str(num) + ".bmp").convert('L')) b1 = b1 + 1 else: seg_img2 = np.array( np.full((50, 50), 0, dtype=np.uint8)) if os.path.isfile("./data/2.5m_hyouzi_batch/" + file_name + "_" + str(num) + ".bmp") == True: seg_img3 = np.array( Image.open("./data/2.5m_hyouzi_batch/" + file_name + "_" + str(num) + ".bmp").convert('L')) c1 = c1 + 1 else: seg_img3 = np.array( np.full((50, 50), 0, dtype=np.uint8)) # seg1 = np.asarray(np.float32(seg_img1) / 255.0) seg2 = np.asarray(np.float32(seg_img2) / 255.0) seg3 = np.asarray(np.float32(seg_img3) / 255.0) imgData = np.asarray([grayImgData, seg1, seg2, seg3]) x = imgData if data_channels == 21: clp41 = src_img41[height_start:height_end, width_start:width_end] PIL_data41 = Image.fromarray(clp41) clp21 = src_img21[height_start:height_end, width_start:width_end] PIL_data21 = Image.fromarray(clp21) grayImgData41 = np.asarray( np.float32(PIL_data41) / 255.0) grayImgData21 = np.asarray( np.float32(PIL_data21) / 255.0) imgData = np.asarray([grayImgData41, grayImgData21]) x = imgData with chainer.using_config('train', False), chainer.using_config( 'enable_backprop', False): y = model.predictor(x[None, ...]).data.argmax(axis=1)[0] yy = model.predictor(x[None, ...]) rate = F.softmax(yy.data)[0][1] # print(rate.data*100) # if rate.data >= 0.2: if y == 1: # print('CNN: crack:') # plt.imshow(grayImgData, cmap='gray') # plt.show() for y in range(height_start, height_end): for x in range(width_start, width_end): mask[y][x] = 255 dst_img[y][x][2] = dst_img[y][x][2] + 20 if dst_img[y][x][2] >= 255: dst_img[y][x][2] = 254 print(a1, b1, c1) # a,b,c,d = F1_measure(f1_img,mask,file_name,seg,"./data/t_gt_gray_own/") # a,b,c,d = F1_measure(f1_img,mask,file_name,seg,"./data/2.5m_gt_gray_own/","../95-5/"+filter_str+"/"+save_file) # TP = TP + a # FP = FP + b # FN = FN + c # TN = TN + d cv2.imwrite('CNN_output/' + file_name + '.bmp', dst_img) # Precision = (TP+0.001)/(TP+FP+0.001) # Recall = (TP+0.001)/(TP+FN+0.001) # F1 = 2*Recall*Precision/(Recall+Precision) # Specificity = (TN+0.001)/(TN+FP+0.001) # # print ("Precision={:.4}".format(Precision)) # print ("Recall={:.4}".format(Recall)) # print ("Specificity={:.4}".format(Specificity)) # print ("F1={:.4}\n\n".format(F1)) ## f = open("./F1/F1.txt",'w') # f = open("./../95-5/"+filter_str+"/"+save_file+"/F1.txt",'w') # f.write("Precision={:.4}".format(Precision)+'\n') # f.write("Recall={:.4}".format(Recall)+"\n") # f.write("F1={:.4}".format(F1)+'\n') # f.write("Specificity={:.4}".format(Specificity)+'\n') # f.close() # ファイルを閉じる # filter_num = filter_num + 1 return 0
def __call__(self, x): h1 = F.relu(self.l1(x)) y = self.l2(h1) return y # データの設定 trainx = np.array(([0, 0], [0, 1], [1, 0], [1, 1]), dtype=np.float32) trainy = np.array([0, 0, 0, 1], dtype=np.int32) train = chainer.datasets.TupleDataset(trainx, trainy) test = chainer.datasets.TupleDataset(trainx, trainy) #Chainerの設定 # ニューラルネットワークの登録 model = L.Classifier(MyChain(), lossfun=F.softmax_cross_entropy) optimizer = chainer.optimizers.Adam() optimizer.setup(model) # イテレータの定義 batchsize = 4 train_iter = chainer.iterators.SerialIterator(train, batchsize) # 学習用 test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) # 評価用 # アップデータの登録 updater = training.StandardUpdater(train_iter, optimizer) # トレーナーの登録 epoch = 500 trainer = training.Trainer(updater, (epoch, 'epoch'))
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument("--model", default="mlp", choices=["mlp", "conv"]) parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=5, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument( '--out', '-o', default='output_chainer', help='Directory to output the graph descriptor and sample test data') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') os.makedirs(args.out, exist_ok=True) # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = L.Classifier(models[args.model](10)) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist(ndim=3) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=os.path.join(args.out, 'chainer_model')) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run() # conversion print('Transpiling model to WebDNN graph descriptor') example_input = numpy.expand_dims( train[0][0], axis=0) # example input (anything ok, (batch_size, 784)) x = chainer.Variable(example_input) y = F.softmax(model.predictor(x)) # run model graph = ChainerConverter().convert( [x], [y]) # convert graph to intermediate representation for backend in ["webgpu", "webassembly", "fallback"]: try: exec_info = generate_descriptor(backend, graph) exec_info.save(args.out) except Exception as ex: print( f"Failed generating descriptor for backend {backend}: {str(ex)}\n" ) else: print(f"Backend {backend} ok\n") print('Exporting test samples (for demo purpose)') test_samples_json = [] for i in range(10): image, label = test[i] test_samples_json.append({ 'x': image.flatten().tolist(), 'y': int(label) }) with open(os.path.join(args.out, 'test_samples.json'), 'w') as f: json.dump(test_samples_json, f)
test_x = test_x/np.max(test_x) test_x = test_x.astype(np.float32) test_y = np.vstack((np.full((93,1),0),np.full((93,1),1))) test_y = np.vstack((test_y,np.full((93,1),2))) test_y = np.vstack((test_y,np.full((93,1),3))) test_y = test_y.astype(np.int32) train_x = train_x.reshape([2248,1,10,513]) # 必ず(データの総数, channel数, 縦, 横)の形にしておく test_x = test_x.reshape([372,1,10,513]) # 必ず(データの総数, channel数, 縦, 横)の形にしておく train_y = train_y.reshape(2248) test_y = test_y.reshape(372) train = tuple_dataset.TupleDataset(train_x, train_y) test = tuple_dataset.TupleDataset(test_x, test_y) sys.exit() model = L.Classifier(CNN()) optimizer = optimizers.Adam() optimizer.setup(model) train_iter = iterators.SerialIterator(train, batch_size=100) test_iter = iterators.SerialIterator(test, batch_size=20, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (150, 'epoch'), out='result') trainer.extend(extensions.Evaluator(test_iter, model)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.PrintReport(['epoch', 'main/accuracy', 'validation/main/accuracy'])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=40, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=600, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # まず同じネットワークのオブジェクトを作る model = L.Classifier(MLP(args.unit, 18)) # そのオブジェクトに保存済みパラメータをロードする serializers.load_npz("my_mnist.npz", model) # "mymodel.npz"の情報をmodelに読み込む # 学習用に数値に変換する # とりあえずオリジナルのナンバーでやるけど。。。そのうち修正したい。 dataMap = { 1 : { "札幌": 0, "函館": 1, "福島": 2, "東京": 3, "中山": 4, "京都": 5, "新潟": 6, "阪神": 7, "中京": 8, "小倉": 9 }, 3 : { "芝" : 0, "ダ" : 1 , "障" : 2, "直" : 3}, } ##学習用データの読み込み #そのうちリスト化してループする #fname = "00_test01.csv" fname_list = ["00_test01.csv", "00_test02.csv", "00_test03.csv", "00_test04.csv", "00_test05.csv", "00_test06.csv", "00_test07.csv", "00_test08.csv", "00_test09.csv", "00_test10.csv", "00_test11.csv", "00_test12.csv"] #fname_list = ["00_test11.csv"] for fname in fname_list: df = pd.read_csv(fname, encoding="shift_jis") # DateFlameの行列取得 row, col = df.shape print("row = {}, col = {}".format(row, col)) print(df.head()) hedder_data = df.iloc[0,[1,3]] #print(hedder_data[0],hedder_data[1]) for course_idx, course in enumerate(dataMap[1]): if course == hedder_data[0]: break #print(course_idx) for track_idx, track in enumerate(dataMap[3]): if track == hedder_data[1]: break #print(track_idx) #出力用にコピー dfcpy = df # 行数18未満ならゼロ埋め idx = 0 for idx in range(row, 18): df.loc[idx] = -1 # End of for diff = 18 - row print(diff) yyyymmdd = df.iloc[0,0] tmp = df["馬番"] #df = df[["馬番", "先行力", "追走力", "持久力", "持続力", "瞬発力", "ST指数", "仕上指数", "合計値", "合計値順位", "基準人気順位", "基準オッズ"]] df = df[["馬番", "展開順位", "展開ゴール差", "先行力", "追走力", "持久力", "持続力", "瞬発力", "ST指数", "仕上指数", "合計値", "合計値順位", "基準人気順位"]] #入力データをnumpy配列に変更 data = np.array([course_idx, track_idx], dtype=np.float32) #data = float32(data) dfary = np.array(df.iloc[:,:]).astype(np.float32) #print(dfary) data = np.append(data, dfary) print("data length={}".format(len(data))) #print(data) data = data.reshape(1,len(data)) #print(data) #ここです! #予測後の出力ノードの配列を作成 outputArray = model.predictor(data).data #print(outputArray) outputArray = outputArray.reshape(18,1) #出力ノードの値のデータフレーム版を作成 outputDF = pd.DataFrame(outputArray[0:18-diff],columns=["機械"]) #outputDF = outputDF[0:18-diff] #outputDF = pd.DataFrame(outputArray,columns=["機械"]) #print(outputDF) outputDF["予想着順"] = outputDF["機械"].rank(ascending=False, method='min') outputDF["馬番"] = tmp cols = list(outputDF.columns.values) cols = ['馬番']+ [col for col in outputDF if col != '馬番'] outputDF = outputDF[cols] dfcpy["予想着順"] = outputDF["予想着順"] #print(str(yyyymmdd)) dfcpy.to_csv(str(yyyymmdd) + "_DL.csv", encoding="shift_jis", index=False, mode="a")
def pretraining(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--batchsize', type=int, default=256) args = parser.parse_args() xp = np gpu_id = args.gpu seed = args.seed train, _ = mnist.get_mnist() train, _ = convert.concat_examples(train, device=gpu_id) batchsize = args.batchsize model = StackedDenoisingAutoEncoder(input_dim=train.shape[1]) if chainer.cuda.available and args.gpu >= 0: xp = cp model.to_gpu(gpu_id) xp.random.seed(seed) # Layer-Wise Pretrain print("Layer-Wise Pretrain") for i, dae in enumerate(model.children()): print("Layer {}".format(i + 1)) train_tuple = tuple_dataset.TupleDataset(train, train) train_iter = iterators.SerialIterator(train_tuple, batchsize) clf = L.Classifier(dae, lossfun=mean_squared_error) clf.compute_accuracy = False if chainer.cuda.available and args.gpu >= 0: clf.to_gpu(gpu_id) optimizer = optimizers.MomentumSGD(lr=0.1) optimizer.setup(clf) updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) trainer = training.Trainer(updater, (50000, "iteration"), out="mnist_result") trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport(['iteration', 'main/loss', 'elapsed_time'])) trainer.extend(ChangeLearningRate(), trigger=(20000, "iteration")) trainer.run() train = dae.encode(train).data # Finetuning print("fine tuning") with chainer.using_config("train", False): train, _ = mnist.get_mnist() train, _ = convert.concat_examples(train, device=gpu_id) train_tuple = tuple_dataset.TupleDataset(train, train) train_iter = iterators.SerialIterator(train_tuple, batchsize) model = L.Classifier(model, lossfun=mean_squared_error) model.compute_accuracy = False if chainer.cuda.available and args.gpu >= 0: model.to_gpu(gpu_id) optimizer = optimizers.MomentumSGD(lr=0.1) optimizer.setup(model) updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) trainer = training.Trainer(updater, (100000, "iteration"), out="mnist_result") trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport(['iteration', 'main/loss', 'elapsed_time'])) trainer.extend(ChangeLearningRate(), trigger=(20000, "iteration")) trainer.run() outfile = "StackedDenoisingAutoEncoder-seed{}.model".format(seed) serializers.save_npz(outfile, model.predictor)
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--compile', action='store_true', help='Compile the model') parser.add_argument('--dump_onnx', action='store_true', help='Dump ONNX model after optimization') args = parser.parse_args() device = parse_device(args) print('Device: {}'.format(device)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. mlp = MLP(args.unit, 10) if args.compile: mlp = chainer_compiler.compile(mlp, dump_onnx=args.dump_onnx) model = L.Classifier(mlp) model.to_device(device) device.use() # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. # TODO(niboshi): Temporarily disabled for chainerx. Fix it. if device.xp is not chainerx: trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
train_iter = iterators.SerialIterator(train, batch_size=batch_size, shuffle=True) test_iter = iterators.SerialIterator(test, batch_size=batch_size, repeat=False, shuffle=False) key = "{}-batch{}".format(net_config['alias'], b) times[key] = [] for t in range(cnn_config['test_times']): print("Cifar10 Current process: CNN {}, test {}".format( key, t)) model = L.Classifier(CNN(net_config)) if cnn_config['context'] == 'gpu': model.to_gpu() optimizer = optimizers.Adam() optimizer.setup(model) train_iter.reset() test_iter.reset() if cnn_config['context'] == 'gpu': updater = training.StandardUpdater(train_iter, optimizer, device=0) elif cnn_config['context'] == 'multi-gpu': assert cnn_config['gpus'] > 1 device_dict = {'main': 0}
class MLP(chainer.Chain): def __init__(self, n_mid_units=100, n_out=10): super(MLP, self).__init__() with self.init_scope(): self.l1 = L.Linear(None, n_mid_units) self.l2 = L.Linear(n_mid_units, n_mid_units) self.l3 = L.Linear(n_mid_units, n_out) def __call__(self, x): h1 = F.relu(self.l1(x)) h2 = F.relu(self.l2(h1)) return self.l3(h2) net = MLP() net = L.Classifier(net) comm = chainermn.create_communicator('naive') if comm.rank == 0: train, test = mnist.get_mnist(withlabel=True, ndim=1) else: train, test = None, None batchsize = 64 train = chainermn.scatter_dataset(train, comm, shuffle=False) test = chainermn.scatter_dataset(test, comm, shuffle=False) train_iter = iterators.SerialIterator(train, batchsize) test_iter = iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False)