def main(): # get args parser = argparse.ArgumentParser(description='ConvolutionNN') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() # print config print("#GPU : {}".format(args.gpu)) print("#batchsize : {}".format(args.batchsize)) print("#epoch : {}".format(args.epoch)) # Model model = Net(50, 10) # setting for using GPU if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Data train, test = chainer.datasets.get_mnist() # convert to 3 dim tensor (channel, height, width) X_train = [( t[0].reshape(1, 28, 28), t[1] ) for t in train ] X_test = [( t[0].reshape(1, 28, 28), t[1] ) for t in test ] train_iter = chainer.iterators.SerialIterator(X_train, args.batchsize) test_iter = chainer.iterators.SerialIterator(X_test, args.batchsize, repeat=False, shuffle=False) # print header print("Epoch\tloss(train)\taccuracy(test)") # train while train_iter.epoch < args.epoch: # next batch data train_batch = train_iter.next() (x, t) = convert.concat_examples(train_batch) # calculate loss and optimize params model.cleargrads() loss = model.forward(x, t) loss.backward() optimizer.update() # log every epoch if( train_iter.is_new_epoch ): # calculate accuracy (x, t) = convert.concat_examples(test_iter.dataset) model.forward( x, t ) print("%d\t%f\t%f" % (train_iter.epoch, loss.data, model.accuracy.data))
def main(): # config max_epoch = 20 batchsize = 100 test_index = 22 # make result directory os.makedirs('result', exist_ok=True) # Model model = AutoEncoder(784, 64, 784) optimizer = chainer.optimizers.Adam() optimizer.setup(model) # make Dataset train, test = chainer.datasets.get_mnist() train_oneline = [t[0] for t in train] train_twoline = tuple_dataset.TupleDataset(train_oneline, train_oneline) train_iter = chainer.iterators.SerialIterator(train_twoline, batchsize) # print header print("Epoch\tloss(train)") # train while train_iter.epoch < max_epoch: train_batch = train_iter.next() (x, t) = convert.concat_examples(train_batch) model.cleargrads() loss = model.loss(x, t) loss.backward() optimizer.update() # log every epoch if( train_iter.is_new_epoch ): # loss (x, t) = convert.concat_examples( train_iter.dataset ) loss_train = model.loss( x, t ) print("%d\t%f" % (train_iter.epoch, loss_train.data)) # plot predict data (x, t) = test[ test_index ] data = model.predictor( np.array([x]) ).data plot_mnist_data(data, t, 'result/epoch_{}.png'.format(train_iter.epoch)) # plot hidden node data = model( np.array([x]), True ).data plot_mnist_data(data, t, 'result/epoch_{}_hidden.png'.format(train_iter.epoch), (8,8))
def extract(self, images, layers=['fc7'], size=(224, 224), test=True, volatile=flag.OFF): """Extracts all the feature maps of given images. The difference of directly executing ``__call__`` is that it directly accepts images as an input and automatically transforms them to a proper variable. That is, it is also interpreted as a shortcut method that implicitly calls ``prepare`` and ``__call__`` functions. Args: images (iterable of PIL.Image or numpy.ndarray): Input images. layers (list of str): The list of layer names you want to extract. size (pair of ints): The resolution of resized images used as an input of CNN. All the given images are not resized if this argument is ``None``, but the resolutions of all the images should be the same. test (bool): If ``True``, dropout runs in test mode. volatile (~chainer.Flag): Volatility flag used for input variables. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ x = concat_examples([prepare(img, size=size) for img in images]) x = Variable(self.xp.asarray(x), volatile=volatile) return self(x, layers=layers, test=test)
def predict(self, images, oversample=True): """Computes all the probabilities of given images. Args: images (iterable of PIL.Image or numpy.ndarray): Input images. oversample (bool): If ``True``, it averages results across center, corners, and mirrors. Otherwise, it uses only the center. Returns: ~chainer.Variable: Output that contains the class probabilities of given images. """ x = concat_examples([prepare(img, size=(256, 256)) for img in images]) if oversample: x = imgproc.oversample(x, crop_dims=(224, 224)) else: x = x[:, :, 16:240, 16:240] # Use no_backprop_mode to reduce memory consumption with function.no_backprop_mode(): x = Variable(self.xp.asarray(x)) y = self(x, layers=['prob'])['prob'] if oversample: n = y.data.shape[0] // 10 y_shape = y.data.shape[1:] y = reshape(y, (n, 10) + y_shape) y = sum(y, axis=1) / 10 return y
def seq2seq_pad_concat_convert(xy_batch, device, eos_id=0): """ Args: xy_batch (list of tuple of two numpy.ndarray-s or cupy.ndarray-s): xy_batch[i][0] is an array of token ids of i-th input sentence in a minibatch. xy_batch[i][1] is an array of token ids of i-th target sentence in a minibatch. The shape of each array is `(sentence length, )`. device (int or None): Device ID to which an array is sent. If it is negative value, an array is sent to CPU. If it is positive, an array is sent to GPU with the given ID. If it is ``None``, an array is left in the original device. Returns: Tuple of Converted array. (input_sent_batch_array, target_sent_batch_input_array, target_sent_batch_output_array). The shape of each array is `(batchsize, max_sentence_length)`. All sentences are padded with -1 to reach max_sentence_length. """ x_seqs, y_seqs = zip(*xy_batch) x_block = convert.concat_examples(x_seqs, device, padding=-1) y_block = convert.concat_examples(y_seqs, device, padding=-1) xp = cuda.get_array_module(x_block) # add eos x_block = xp.pad(x_block, ((0, 0), (0, 1)), 'constant', constant_values=-1) for i_batch, seq in enumerate(x_seqs): x_block[i_batch, len(seq)] = eos_id y_out_block = xp.pad(y_block, ((0, 0), (0, 1)), 'constant', constant_values=-1) for i_batch, seq in enumerate(y_seqs): y_out_block[i_batch, len(seq)] = eos_id y_in_block = xp.pad(y_block, ((0, 0), (1, 0)), 'constant', constant_values=eos_id) return (x_block, y_in_block, y_out_block)
def __fit(self, train_data, valid_data, test_data, callback): batch_size = self.batch_size train_iterator = chainer.iterators.SerialIterator(train_data, self.batch_size, repeat=True, shuffle=True) train_loss = 0 train_acc = 0 num = 0 iteration = 0 iteration_num = len(train_data) * self.epoch_num // self.batch_size while train_iterator.epoch < self.epoch_num: if self.lr_shape == 'cosine': lr = 0.5 * self.initial_lr * (1 + math.cos(math.pi * iteration / iteration_num)) if hasattr(self.optimizer, 'alpha'): self.optimizer.alpha = lr else: self.optimizer.lr = lr batch = train_iterator.next() x_batch, y_batch = convert.concat_examples(batch, self.device_id) loss, acc = self.__forward(x_batch, y_batch) self.net.cleargrads() loss.backward() self.optimizer.update() train_loss += float(loss.data) * len(x_batch) train_acc += float(acc.data) * len(x_batch) num += len(x_batch) iteration += 1 if not train_iterator.is_new_epoch: continue train_loss /= num train_acc /= num valid_loss = None valid_acc = None if valid_data is not None: valid_loss, valid_acc = self.__evaluate(valid_data) test_loss = None test_acc = None test_time = 0 if test_data is not None: start_clock = time.clock() test_loss, test_acc = self.__evaluate(test_data) test_time = time.clock() - start_clock epoch = train_iterator.epoch if callback is not None: callback(epoch, self.net, self.optimizer, train_loss, train_acc, valid_loss, valid_acc, test_loss, test_acc, test_time) train_loss = 0 train_acc = 0 num = 0 if self.lr_shape == 'multistep': lr_decay = self.lr_decay if len(lr_decay) == 1 and lr_decay[0] > 0 and epoch % lr_decay[0] == 0 or epoch in lr_decay: if hasattr(self.optimizer, 'alpha'): self.optimizer.alpha *= 0.1 else: self.optimizer.lr *= 0.1 train_iterator.finalize()
def extract(self, images, layers=None, size=(224, 224), **kwargs): """extract(self, images, layers=['pool5'], size=(224, 224)) Extracts all the feature maps of given images. The difference of directly executing ``forward`` is that it directly accepts images as an input and automatically transforms them to a proper variable. That is, it is also interpreted as a shortcut method that implicitly calls ``prepare`` and ``forward`` functions. Unlike ``predict`` method, this method does not override ``chainer.config.train`` and ``chainer.config.enable_backprop`` configuration. If you want to extract features without updating model parameters, you need to manually set configuration when calling this method as follows: .. code-block:: python # model is an instance of ResNetLayers (50 or 101 or 152 layers) with chainer.using_config('train', False): with chainer.using_config('enable_backprop', False): feature = model.extract([image]) Args: images (iterable of PIL.Image or numpy.ndarray): Input images. layers (list of str): The list of layer names you want to extract. size (pair of ints): The resolution of resized images used as an input of CNN. All the given images are not resized if this argument is ``None``, but the resolutions of all the images should be the same. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ if layers is None: layers = ['pool5'] if kwargs: argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config', volatile='volatile argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) x = concat_examples([prepare(img, size=size) for img in images]) x = Variable(self.xp.asarray(x)) return self(x, layers=layers)
def train(args): model = JaCCGEmbeddingTagger(args.model, args.word_emb_size, args.char_emb_size) if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.pretrained: print('Load pretrained word embeddings from', args.pretrained) model.load_pretrained_embeddings(args.pretrained) train = JaCCGTaggerDataset(args.model, args.train) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val = JaCCGTaggerDataset(args.model, args.val) val_iter = chainer.iterators.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.AdaGrad() optimizer.setup(model) # optimizer.add_hook(WeightDecay(1e-8)) my_converter = lambda x, dev: convert.concat_examples( x, dev, (None, -1, None, None)) updater = training.StandardUpdater(train_iter, optimizer, converter=my_converter) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) val_interval = 1000, 'iteration' log_interval = 200, 'iteration' eval_model = model.copy() eval_model.train = False trainer.extend(extensions.Evaluator(val_iter, eval_model, my_converter), trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def extract(self, images, layers=None, size=(224, 224), **kwargs): """extract(self, images, layers=['pool5'], size=(224, 224)) Extracts all the feature maps of given images. The difference of directly executing ``forward`` is that it directly accepts images as an input and automatically transforms them to a proper variable. That is, it is also interpreted as a shortcut method that implicitly calls ``prepare`` and ``forward`` functions. Unlike ``predict`` method, this method does not override ``chainer.config.train`` and ``chainer.config.enable_backprop`` configuration. If you want to extract features without updating model parameters, you need to manually set configuration when calling this method as follows: .. code-block:: python # model is an instance of `GoogLeNet` with chainer.using_config('train', False): with chainer.using_config('enable_backprop', False): feature = model.extract([image]) Args: images (iterable of PIL.Image or numpy.ndarray): Input images. layers (list of str): The list of layer names you want to extract. size (pair of ints): The resolution of resized images used as an input of CNN. All the given images are not resized if this argument is ``None``, but the resolutions of all the images should be the same. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ if layers is None: layers = ['pool5'] if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', volatile='volatile argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) x = concat_examples([prepare(img, size=size) for img in images]) x = Variable(self.xp.asarray(x)) return self(x, layers=layers)
def source_pad_concat_convert(x_seqs, device, eos_id=0, bos_id=2): x_block = convert.concat_examples(x_seqs, device, padding=-1) xp = cuda.get_array_module(x_block) # add eos x_block = xp.pad(x_block, ((0, 0), (0, 1)), 'constant', constant_values=-1) for i_batch, seq in enumerate(x_seqs): x_block[i_batch, len(seq)] = eos_id x_block = xp.pad(x_block, ((0, 0), (1, 0)), 'constant', constant_values=bos_id) return x_block
def kdnet_converter(batch, device=None, padding=None): # concat_examples to CPU at first. result = concat_examples(batch, device=None, padding=padding) out_list = [] for elem in result: if elem.dtype != object: # Send to GPU for int/float dtype array. out_list.append(to_device(device, elem)) else: # Do NOT send to GPU for dtype=object array. out_list.append(elem) return tuple(out_list)
def source_pad_concat_convert(x_seqs, device, eos_id=1, bos_id=3): x_block = convert.concat_examples(x_seqs, device, padding=0) # add eos x_block = np.pad(x_block, ((0, 0), (0, 1)), 'constant', constant_values=0) for i_batch, seq in enumerate(x_seqs): x_block[i_batch, len(seq)] = eos_id x_block = np.pad(x_block, ((0, 0), (1, 0)), 'constant', constant_values=bos_id) return x_block
def run_train_loop(optimizer, train_iter, test_iter, test_count, epoch, device): model = optimizer.target train_count = 0 sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < epoch: batch = train_iter.next() x_array, t_array = convert.concat_examples(batch, device) x = chainer.Variable(x_array) t = chainer.Variable(t_array, requires_grad=False) optimizer.update(model, x, t) train_count += len(t) sum_loss += float(model.loss.array) * len(t) sum_accuracy += float(model.accuracy.array) * len(t) if train_iter.is_new_epoch: print('epoch: ', train_iter.epoch) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation train_count = 0 sum_accuracy = 0 sum_loss = 0 # It is good practice to turn off train mode during evaluation. with configuration.using_config('train', False): for batch in test_iter: x_array, t_array = convert.concat_examples(batch, device) x = chainer.Variable(x_array) t = chainer.Variable(t_array, requires_grad=False) loss = model(x, t) sum_loss += float(loss.array) * len(t) sum_accuracy += float(model.accuracy.array) * len(t) test_iter.reset() print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--resume', '-r', default='result/model_iter_9', help='Resume the training from snapshot') parser.add_argument('--deformable', '-d', type=int, default=1, help='use deformable convolutions') args = parser.parse_args() if args.deformable == 1: model = DeformableConvnet(10) else: model = Convnet(10) chainer.serializers.load_npz(args.resume, model) train, test = chainer.datasets.get_mnist(ndim=3) test = TransformDataset(test, transform) test_iter = chainer.iterators.SerialIterator(test, batch_size=1, repeat=False, shuffle=False) threshold = 1 for i in range(1): batch = test_iter.next() in_arrays = concat_examples(batch, device=None) in_vars = tuple(chainer.Variable(x) for x in in_arrays) img, label = in_vars model(img) feat = model.feat H, W = feat.shape[2:] center = F.sum(feat[:, :, H / 2, W / 2]) center.grad = np.ones_like(center.data) model.zerograds() img.zerograd() center.backward(retain_grad=True) img_grad = img.grad[0] # (1, 28, 28) img_grad_abs = (np.abs(img_grad) / np.max(np.abs(img_grad)) * 255)[0] # 28, 28 img_grad_abs[np.isnan(img_grad_abs)] = 0 y_indices, x_indices = np.where(img_grad_abs > threshold) plt.scatter(x_indices, y_indices, c='red') vis_img = transforms.chw_to_pil_image(255 * img.data[0])[:, :, 0] plt.imshow(vis_img, interpolation='nearest', cmap='gray') plt.show()
def predict(model, test_iter): probs = [] test_iter.reset() for batch in test_iter: in_arrays = convert.concat_examples(batch, args.gpu) with chainer.using_config('train', False), \ chainer.using_config('enable_backprop', False): y = model(in_arrays[0]) prob = chainer.functions.softmax(y) probs.append(prob.data) return concat_arrays(probs)
def evaluate(model, it, device): """ evaluation """ test_loss = 0 test_accuracy = 0 for batch in it: x, t = convert.concat_examples(batch, device) test_loss += model(x, t) * len(batch) test_accuracy += model.accuracy * len(batch) logger.plot('test loss', test_loss / len(it.dataset)) logger.plot('test accuracy', test_accuracy / len(it.dataset))
def __init__(self, dataset, batch_size, repeat=True, shuffle=True): super(custom_iterator, self).__init__(dataset, batch_size, repeat=repeat, shuffle=shuffle) label_array = convert.concat_examples(dataset)[1] self.labels = numpy.sort(numpy.unique(label_array)) self.label_cnt = collections.OrderedDict() for l in self.labels: cnt = len(numpy.where(label_array == l)[0]) self.label_cnt[l] = cnt
def resize_converter(batch, device=None, padding=None): new_batch = [] for image in batch: C, W, H = image.shape if C == 4: image = image[:3, :, :] if W < H: offset = (H - W) // 2 image = image[:, :, offset:offset + W] elif W > H: offset = (W - H) // 2 image = image[:, offset:offset + H, :] image = image.transpose(1, 2, 0) image = imresize(image, (args.image_size, args.image_size), interp='bilinear') image = image.transpose(2, 0, 1) image = image / 255. # 0. ~ 1. # Augumentation... Random vertical flip if np.random.rand() < 0.5: image = image[:, :, ::-1] # Augumentation... Tone correction mode = np.random.randint(4) # mode == 0 -> no correction if mode == 1: gain = 0.2 * np.random.rand() + 0.9 # 0.9 ~ 1.1 image = np.power(image, gain) elif mode == 2: gain = 1.5 * np.random.rand() + 1e-10 # 0 ~ 1.5 image = np.tanh(gain * (image - 0.5)) range_min = np.tanh(gain * (-0.5)) # @x=0.5 range_max = np.tanh(gain * 0.5) # @x=1.0 image = (image - range_min) / (range_max - range_min) elif mode == 3: gain = 2.0 * np.random.rand() + 1e-10 # 0 ~ 1.5 image = np.sinh(gain * (image - 0.5)) range_min = np.tanh(gain * (-0.5)) # @x=0.5 range_max = np.tanh(gain * 0.5) # @x=1.0 image = (image - range_min) / (range_max - range_min) image = 2. * image - 1. new_batch.append(image.astype(np.float32)) return concat_examples(new_batch, device=device, padding=padding)
def __call__(self, trainer): iterator = copy.copy(self.iterator) embedded_feats = [] for v in iterator: self._check_type_dataset(v[0]) arrays = convert.concat_examples(v, device=chainer.cuda.get_device( self.target)) h = forward(self.target, arrays[0:1], forward_func=self.embed_func)[0] embedded_feats.append(h) embedded_feats = np.concatenate(embedded_feats, axis=0) np.save(osp.join(trainer.out, self.filename), embedded_feats)
def concat_examples_one(batch, device=None, padding=None): """Concat examples in minibatch. :param np.ndarray batch: The batch to concatenate :param int device: The device to send to :param Tuple[int,int] padding: The padding to use :return: (inputs, targets) :rtype (torch.Tensor, torch.Tensor) """ x = convert.concat_examples(batch, padding=padding) x = torch.from_numpy(x) if device is not None and device >= 0: x = x.cuda(device) return x
def make_preview(trainer): with chainer.using_config('train', False): with chainer.no_backprop_mode(): x_a = iterator_a.next() x_a = convert.concat_examples(x_a, device) x_a = chainer.Variable(x_a) x_b = iterator_b.next() x_b = convert.concat_examples(x_b, device) x_b = chainer.Variable(x_b) x_ab = g_a(x_a) x_ba = g_b(x_b) x_bab = g_a(x_ba) x_aba = g_b(x_ab) preview_dir = '{}/preview'.format(dst) if not os.path.exists(preview_dir): os.makedirs(preview_dir) image_dir = '{}/image'.format(dst) if not os.path.exists(image_dir): os.makedirs(image_dir) names = ['a', 'ab', 'aba', 'b', 'ba', 'bab'] images = [x_a, x_ab, x_aba, x_b, x_ba, x_bab] for n, i in zip(names, images): i = cp.asnumpy(i.data)[:, :, padding:-padding, :].reshape( 1, -1, 128) image.save( image_dir + '/{}{}.jpg'.format(trainer.updater.epoch, n), i) w = np.concatenate( [gla.inverse(_i) for _i in dataset.reverse(i)]) dataset.save( preview_dir + '/{}{}.wav'.format(trainer.updater.epoch, n), 16000, w)
def make_image(trainer): # read data batch_a = iterator_a.next() x_a = convert.concat_examples(batch_a, device) x_a = chainer.Variable(x_a, volatile='on') batch_b = iterator_b.next() x_b = convert.concat_examples(batch_b, device) x_b = chainer.Variable(x_b, volatile='on') # conversion x_ab = generator_ab(x_a, test=True) x_ba = generator_ba(x_b, test=True) # to cpu x_a = chainer.cuda.to_cpu(x_a.data) x_b = chainer.cuda.to_cpu(x_b.data) x_ab = chainer.cuda.to_cpu(x_ab.data) x_ba = chainer.cuda.to_cpu(x_ba.data) # reshape x = np.concatenate((x_a, x_ab, x_b, x_ba), 0) x = x.reshape(4, 10, 3, 64, 64) x = x.transpose(0, 3, 1, 4, 2) x = x.reshape((4 * 64, 10 * 64, 3)) # to [0, 255] x += 1 x *= (255 / 2) x = np.asarray(np.clip(x, 0, 255), dtype=np.uint8) preview_dir = '{}/preview'.format(dst) preview_path = preview_dir +\ '/image{:0>5}.png'.format(trainer.updater.epoch) if not os.path.exists(preview_dir): os.makedirs(preview_dir) Image.fromarray(x).save(preview_path)
def train(x): epochs = 50 stepsizes = [40] gamma = 0.1 lr, momentum, h_units = x model = MLP(h_units) if args.gpu > -1: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=lr, momentum=momentum) optimizer.setup(model) test_accuracy = 0 for epoch in range(epochs): train_iter.reset() accuracy = [] data_iter = copy.copy(train_iter) for batch in data_iter: x, t = concat_examples(batch, device=device) optimizer.update(model, x, t) accuracy.append(float(model.accuracy.data)) train_accuracy = np.mean(accuracy) del accuracy[:] data_iter = copy.copy(test_iter) for batch in data_iter: x, t = concat_examples(batch, device=device) model(x, t) accuracy.append(float(model.accuracy.data)) if (epoch + 1) in stepsizes: optimizer.lr *= gamma test_accuracy = np.mean(accuracy) # print(epoch, train_accuracy, test_accuracy) return test_accuracy
def extract(self, images, layers=None, size=(224, 224), **kwargs): """extract(self, images, layers=['pool5'], size=(224, 224)) Extracts all the feature maps of given images. The difference of directly executing ``__call__`` is that it directly accepts images as an input and automatically transforms them to a proper variable. That is, it is also interpreted as a shortcut method that implicitly calls ``prepare`` and ``__call__`` functions. .. warning:: ``train`` and ``volatile`` arguments are not supported anymore since v2. Instead, use ``chainer.using_config('train', train)`` and ``chainer.using_config('enable_backprop', not volatile)`` respectively. See :func:`chainer.using_config`. Args: images (iterable of PIL.Image or numpy.ndarray): Input images. layers (list of str): The list of layer names you want to extract. size (pair of ints): The resolution of resized images used as an input of CNN. All the given images are not resized if this argument is ``None``, but the resolutions of all the images should be the same. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ if layers is None: layers = ['pool5'] argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', volatile='volatile argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) x = concat_examples([prepare(img, size=size) for img in images]) x = Variable(self.xp.asarray(x)) return self(x, layers=layers)
def eval(loss_function, iterator): """ Evaluates the mean of given loss function over the entire batch in given iterator :param loss_function: The loss function to evaluate :param iterator: The iterator over the evaluation data set :return: The mean loss value """ iterator.reset() results = [] for batch in iterator: input_args = convert.concat_examples(batch) results.append(loss_function(*input_args).data) return np.mean(results)
def fact_pad_concat_convert(fact_batch, device, test=False): """ Args: fact_batch: List of tuples of heads, relations, tails, and labels. device: Device ID to which an array is sent. test: If it is test, this flag should be true. Returns: Tuple of Converted array. """ hs, rs, ts, ys = zip(*fact_batch) h_block = convert.concat_examples(hs, device, padding=PAD) r_block = convert.concat_examples(rs, device, padding=PAD) t_block = convert.concat_examples(ts, device, padding=PAD) if test is True: y_block = convert.concat_examples(ys, device, padding=PAD) else: # add negative example xp = cuda.get_array_module(h_block) h_block_neg = h_block.copy() r_block_neg = r_block.copy() t_block_neg = t_block.copy() xp.random.shuffle(h_block_neg) xp.random.shuffle(r_block_neg) xp.random.shuffle(t_block_neg) h_block = xp.concatenate((h_block, h_block_neg)) r_block = xp.concatenate((r_block, r_block_neg)) t_block = xp.concatenate((t_block, t_block_neg)) y_block = convert.concat_examples(xp.concatenate( (xp.ones(len(hs), 'i'), xp.zeros(len(hs), 'i')), ), device, padding=PAD) return (h_block, r_block, t_block, y_block)
def evaluate(model, iter): # Evaluation routine to be used for validation and test. model.predictor.train = False evaluator = model.copy() # to use different state evaluator.predictor.reset_state() # initialize state evaluator.predictor.train = False # dropout does nothing sum_perp = 0 data_count = 0 for batch in copy.copy(iter): x, t = convert.concat_examples(batch, args.gpu) loss = evaluator(x, t) sum_perp += loss.data data_count += 1 model.predictor.train = True return np.exp(float(sum_perp) / data_count)
def seq2seq_pad_concat_convert(xy_batch, device, eos_id=0, bos_id=2): x_seqs, y_seqs = zip(*xy_batch) x_block = convert.concat_examples(x_seqs, device, padding=-1) y_block = convert.concat_examples(y_seqs, device, padding=-1) xp = cuda.get_array_module(x_block) x_block = xp.pad(x_block, ((0, 0), (0, 1)), 'constant', constant_values=-1) for i_batch, seq in enumerate(x_seqs): x_block[i_batch, len(seq)] = eos_id x_block = xp.pad(x_block, ((0, 0), (1, 0)), 'constant', constant_values=bos_id) y_out_block = xp.pad(y_block, ((0, 0), (0, 1)), 'constant', constant_values=-1) for i_batch, seq in enumerate(y_seqs): y_out_block[i_batch, len(seq)] = eos_id y_in_block = xp.pad(y_block, ((0, 0), (1, 0)), 'constant', constant_values=bos_id) return (x_block, y_in_block, y_out_block)
def __evaluate(self, data): iterator = chainer.iterators.SerialIterator(data, self.batch_size, repeat=False, shuffle=False) total_loss = 0 total_acc = 0 num = 0 with chainer.using_config('enable_backprop', False): with chainer.using_config('train', False): for batch in iterator: x_batch, y_batch = convert.concat_examples(batch, self.device_id) loss, acc = self.__forward(x_batch, y_batch) total_loss += float(loss.data) * len(x_batch) total_acc += float(acc.data) * len(x_batch) num += len(x_batch) iterator.finalize() return total_loss / num, total_acc / num
def evaluate(model, iter): # Evaluation routine to be used for validation and test. evaluator = model.copy() # to use different state evaluator.predictor.reset_state() # initialize state sum_perp = 0 data_count = 0 # Enable evaluation mode. with configuration.using_config('train', False): # This is optional but can reduce computational overhead. with chainer.using_config('enable_backprop', False): for batch in copy.copy(iter): x, t = convert.concat_examples(batch, args.gpu) loss = evaluator(x, t) sum_perp += loss.array data_count += 1 return np.exp(float(sum_perp) / data_count)
def __call__(self, trainer): if not _available: return features_file = osp.join(trainer.out, self.features_file) iterator = copy.copy(self.iterator) features = np.load(features_file) optimizer = trainer.updater.get_optimizer('main') classes = [] for v in iterator: arrays = convert.concat_examples(v) classes.append(arrays[1]) classes = np.concatenate(classes, axis=0) if features.shape[0] != classes.shape[0]: raise ValueError( 'batch size of features and the class array differ') n = features.shape[0] n_match = {k: [] for k in self.ks} self.nbrs.fit(features) for i in six.moves.range(0, n, self.window_size): the_slice = slice( i * self.window_size, (i + 1) * self.window_size) src_features = features[the_slice] src_classes = classes[the_slice] indices = self.nbrs.kneighbors( src_features, n_neighbors=self.max_k + 1, return_distance=False) indices = indices[:, 1:] knbr_classes = classes[indices] # (window, max_k) match = knbr_classes == src_classes[:, None] for k in self.ks: n_match_k = np.any(match[:, :k], axis=1) n_match[k].append(n_match_k) for k in self.ks: n_match[k] = np.concatenate(n_match[k]) reporter.report({'recall@{}'.format(k): np.mean(n_match[k])}, optimizer.target)
def evaluate(model, iter, bproplen=100): # Evaluation routine to be used for validation and test. model.predictor.train = False evaluator = model.copy() # to use different state state = None evaluator.predictor.train = False # dropout does nothing sum_perp = 0 data_count = 0 for batch in copy.copy(iter): x, t = convert.concat_examples(batch, gpu_id) state, loss = evaluator(state, x, t) sum_perp += loss.data if data_count % bproplen == 0: loss.unchain_backward() # Truncate the graph data_count += 1 model.predictor.train = True return np.exp(float(sum_perp) / data_count)
def test_realvideo(self): dis = Discriminator() all_files = os.listdir(self.dataset) video_files = [f for f in all_files if ('mp4' in f)] train = PreprocessedDataset(paths=video_files, root=self.dataset) train_iter = chainer.iterators.SerialIterator(train, self.batchsize) batch = train_iter.next() x_real = Variable(convert.concat_examples(batch, self.gpu)) self.assertEqual( (self.batchsize, 3, self.frame, self.height, self.width), x_real.shape) y_real = dis(x_real) self.assertEqual((self.batchsize, 1), y_real.shape)
def seq_pad_concat(batch, device): labels, word_ids = zip(*batch) block_w = convert.concat_examples(word_ids, device, padding=preprocess.Vocab_Pad.PAD) sent_len = np.array(list(map(lambda x: len(x), word_ids))) # Converting from numpy format to Torch Tensor block_w = Variable(torch.LongTensor(block_w).type(LONG_TYPE), requires_grad=False) labels = Variable(torch.LongTensor(labels).type(LONG_TYPE), requires_grad=False) return Batch(batch_size=len(labels), word_ids=block_w.transpose(0, 1).contiguous(), labels=labels, sent_len=sent_len)
def validation_measure(data_index, which_measure): x_array, t_array = convert.concat_examples(validation[data_index]) x = chainer.Variable(x_array) y_validation_predict = model.forward_2(x).data if data_set_name[data_index] in ['cocnas', 'maxwell', 'opens']: y_validation_predict = np.power(math.e, y_validation_predict) t_array = np.power(math.e, t_array) if which_measure == 'Pred(25)': return criteria.pred25(t_array, y_validation_predict) elif which_measure == 'MdAE': return criteria.mae(t_array, y_validation_predict) elif which_measure == 'SA': return criteria.sa(t_array, y_validation_predict) elif which_measure == 'RE*': return criteria.re(t_array, y_validation_predict) else: return None
def __evaluate(self, data): iterator = chainer.iterators.SerialIterator(data, self.batch_size, repeat=False, shuffle=False) total_loss = 0 total_acc = 0 num = 0 with chainer.using_config('enable_backprop', False): with chainer.using_config('train', False): for batch in iterator: x_batch, y_batch = convert.concat_examples( batch, self.device_id) loss, acc = self.__forward(x_batch, y_batch) total_loss += float(loss.data) * len(x_batch) total_acc += float(acc.data) * len(x_batch) num += len(x_batch) iterator.finalize() return total_loss / num, total_acc / num
def evaluate(self): val_iter = self.get_iterator('main') target = self.get_target('main') loss = 0 count = 0 for batch in copy.copy(val_iter): x, t = convert.concat_examples(batch, device=self.device, padding=(0, -1)) xp = chainer.backends.cuda.get_array_module(x) state = None for i in six.moves.range(len(x[0])): state, loss_batch = target(state, x[:, i], t[:, i]) non_zeros = xp.count_nonzero(x[:, i]) loss += loss_batch.data * non_zeros count += int(non_zeros) # report validation loss observation = {} with reporter.report_scope(observation): reporter.report({'loss': float(loss / count)}, target) return observation
def main(): # config max_epoch = 20 batchsize = 100 # Model model = Net(100, 10) optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Data train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) # print header print("Epoch\tloss(train)\taccuracy(train)\taccuracy(test)") # train while train_iter.epoch < max_epoch: train_batch = train_iter.next() (x, t) = convert.concat_examples(train_batch) model.cleargrads() loss = model.loss(x, t) loss.backward() optimizer.update() # log every epoch if( train_iter.is_new_epoch ): # calculate accuracy loss_train, accuracy_train = model.loss_with_accuracy( *train._datasets ) _ , accuracy_test = model.loss_with_accuracy( *test._datasets ) print("%d\t%f\t%f\t%f" % (train_iter.epoch, loss_train.data, accuracy_train, accuracy_test))
def evaluate(model, iter): # Evaluation routine to be used for validation and test. evaluator = model.copy() # to use different state evaluator.rnn.reset_state() # initialize state sum_perp = 0 data_count = 0 words = [] labels = [] lossfun = softmax_cross_entropy.softmax_cross_entropy with configuration.using_config('train', False): iter.reset() for batch in iter: word, label = convert.concat_examples(batch, args.gpu) words.append(word) labels.append(label) data_count += 1 outputs = evaluator(words) for ind in range(len(outputs)): y = outputs[ind] label = labels[ind] loss = lossfun(y, label) sum_perp += loss.array return np.exp(float(sum_perp) / data_count)
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot using model ' 'and state files in the specified directory') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() device = parse_device(args) print('Device: {}'.format(device)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train model = L.Classifier(train_mnist.MLP(args.unit, 10)) model.to_device(device) device.use() # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) if args.resume: # Resume from a snapshot serializers.load_npz('{}/mlp.model'.format(args.resume), model) serializers.load_npz('{}/mlp.state'.format(args.resume), optimizer) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_count = len(train) test_count = len(test) with SerialIterator(train, args.batchsize) as train_iter, \ SerialIterator( test, args.batchsize, repeat=False, shuffle=False) as test_iter: sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() x, t = convert.concat_examples(batch, device) optimizer.update(model, x, t) sum_loss += float(model.loss.array) * len(t) sum_accuracy += float(model.accuracy.array) * len(t) if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation sum_accuracy = 0 sum_loss = 0 # Enable evaluation mode. with configuration.using_config('train', False): # This is optional but can reduce computational overhead. with chainer.using_config('enable_backprop', False): for batch in test_iter: x, t = convert.concat_examples(batch, device) loss = model(x, t) sum_loss += float(loss.array) * len(t) sum_accuracy += float( model.accuracy.array) * len(t) test_iter.reset() print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0 # Save the model and the optimizer print('save the model') serializers.save_npz('{}/mlp.model'.format(args.out), model) print('save the optimizer') serializers.save_npz('{}/mlp.state'.format(args.out), optimizer)
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.test: train = train[:200] test = test[:200] train_count = len(train) test_count = len(test) model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() # Reduce learning rate by 0.5 every 25 epochs. if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch: optimizer.lr *= 0.5 print('Reducing learning rate to: ', optimizer.lr) x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) optimizer.update(model, x, t) sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) if train_iter.is_new_epoch: print('epoch: ', train_iter.epoch) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation sum_accuracy = 0 sum_loss = 0 model.predictor.train = False for batch in test_iter: x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) loss = model(x, t) sum_loss += float(loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) test_iter.reset() model.predictor.train = True print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0 # Save the model and the optimizer print('save the model') serializers.save_npz('mlp.model', model) print('save the optimizer') serializers.save_npz('mlp.state', optimizer)
def main(): parser = argparse.ArgumentParser(description='Chainer example: VAE') parser.add_argument('--initmodel', '-m', default='', help='Initialize the model from given file') parser.add_argument('--resume', '-r', default='', help='Resume the optimization from snapshot') parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--epoch', '-e', default=100, type=int, help='number of epochs to learn') parser.add_argument('--dimz', '-z', default=20, type=int, help='dimention of encoded vector') parser.add_argument('--batchsize', '-b', type=int, default=100, help='learning minibatch size') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# dim z: {}'.format(args.dimz)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Prepare VAE model, defined in net.py model = net.VAE(784, args.dimz, 500) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Initialize / Resume if args.initmodel: chainer.serializers.load_npz(args.initmodel, model) if args.resume: chainer.serializers.load_npz(args.resume, optimizer) # Load the MNIST dataset train, test = chainer.datasets.get_mnist(withlabel=False) if args.test: train, _ = chainer.datasets.split_dataset(train, 100) test, _ = chainer.datasets.split_dataset(test, 100) train_count = len(train) test_count = len(test) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) while train_iter.epoch < args.epoch: sum_loss = 0 sum_rec_loss = 0 batch = train_iter.next() x_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) # Update model based on the loss function # defined by model.get_loss_func() optimizer.update(model.get_loss_func(), x) sum_loss += float(model.loss.data) * len(x.data) sum_rec_loss += float(model.rec_loss.data) * len(x.data) if train_iter.is_new_epoch: print('train mean loss={}, mean reconstruction loss={}' .format(sum_loss / train_count, sum_rec_loss / train_count)) # evaluation sum_loss = 0 sum_rec_loss = 0 for batch in test_iter: x_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) loss_func = model.get_loss_func(k=10) loss_func(x) sum_loss += float(model.loss.data) * len(x.data) sum_rec_loss += float(model.rec_loss.data) * len(x.data) test_iter.reset() print('test mean loss={}, mean reconstruction loss={}' .format(sum_loss / test_count, sum_rec_loss / test_count)) # Note that os.makedirs(path, exist_ok=True) can be used # if this script only supports python3 if not os.path.exists(args.out): os.mkdir(args.out) # Save the model and the optimizer print('save the model') chainer.serializers.save_npz( os.path.join(args.out, 'mlp.model'), model) print('save the optimizer') chainer.serializers.save_npz( os.path.join(args.out, 'mlp.state'), optimizer) # Visualize the results def save_images(x, filename): import matplotlib.pyplot as plt fig, ax = plt.subplots(3, 3, figsize=(9, 9), dpi=100) for ai, xi in zip(ax.flatten(), x): ai.imshow(xi.reshape(28, 28)) fig.savefig(filename) model.to_cpu() train_ind = [1, 3, 5, 10, 2, 0, 13, 15, 17] x = chainer.Variable(np.asarray(train[train_ind])) with chainer.using_config('train', False), chainer.no_backprop_mode(): x1 = model(x) save_images(x.data, os.path.join(args.out, 'train')) save_images(x1.data, os.path.join(args.out, 'train_reconstructed')) test_ind = [3, 2, 1, 18, 4, 8, 11, 17, 61] x = chainer.Variable(np.asarray(test[test_ind])) with chainer.using_config('train', False), chainer.no_backprop_mode(): x1 = model(x) save_images(x.data, os.path.join(args.out, 'test')) save_images(x1.data, os.path.join(args.out, 'test_reconstructed')) # draw images from randomly sampled z z = chainer.Variable( np.random.normal(0, 1, (9, args.dimz)).astype(np.float32)) x = model.decode(z) save_images(x.data, os.path.join(args.out, 'sampled'))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=20, help='Number of examples in each mini-batch') parser.add_argument('--bproplen', '-l', type=int, default=35, help='Number of words in each mini-batch ' '(= length of truncated BPTT)') parser.add_argument('--epoch', '-e', type=int, default=39, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', type=str, help='Directory that has `rnnln.model`' ' and `rnnlm.state`') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.set_defaults(test=False) parser.add_argument('--unit', '-u', type=int, default=650, help='Number of LSTM units in each layer') args = parser.parse_args() def evaluate(model, iter): # Evaluation routine to be used for validation and test. evaluator = model.copy() # to use different state evaluator.predictor.reset_state() # initialize state sum_perp = 0 data_count = 0 # Enable evaluation mode. with configuration.using_config('train', False): # This is optional but can reduce computational overhead. with chainer.using_config('enable_backprop', False): iter.reset() for batch in iter: x, t = convert.concat_examples(batch, args.gpu) loss = evaluator(x, t) sum_perp += loss.array data_count += 1 return np.exp(float(sum_perp) / data_count) # Load the Penn Tree Bank long word sequence dataset train, val, test = chainer.datasets.get_ptb_words() n_vocab = max(train) + 1 # train is just an array of integers print('#vocab = {}'.format(n_vocab)) if args.test: train = train[:100] val = val[:100] test = test[:100] # Create the dataset iterators train_iter = train_ptb.ParallelSequentialIterator(train, args.batchsize) val_iter = train_ptb.ParallelSequentialIterator(val, 1, repeat=False) test_iter = train_ptb.ParallelSequentialIterator(test, 1, repeat=False) # Prepare an RNNLM model rnn = train_ptb.RNNForLM(n_vocab, args.unit) model = L.Classifier(rnn) model.compute_accuracy = False # we only want the perplexity if args.gpu >= 0: # Make the specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Set up an optimizer optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) # Load model and optimizer if args.resume is not None: resume = args.resume if os.path.exists(resume): serializers.load_npz(os.path.join(resume, 'rnnlm.model'), model) serializers.load_npz( os.path.join(resume, 'rnnlm.state'), optimizer) else: raise ValueError( '`args.resume` ("{}") is specified,' ' but it does not exist'.format(resume) ) sum_perp = 0 count = 0 iteration = 0 while train_iter.epoch < args.epoch: loss = 0 iteration += 1 # Progress the dataset iterator for bprop_len words at each iteration. for i in range(args.bproplen): # Get the next batch (a list of tuples of two word IDs) batch = train_iter.__next__() # Concatenate the word IDs to matrices and send them to the device # self.converter does this job # (it is chainer.dataset.concat_examples by default) x, t = convert.concat_examples(batch, args.gpu) # Compute the loss at this time step and accumulate it loss += optimizer.target(chainer.Variable(x), chainer.Variable(t)) count += 1 sum_perp += loss.array optimizer.target.cleargrads() # Clear the parameter gradients loss.backward() # Backprop loss.unchain_backward() # Truncate the graph optimizer.update() # Update the parameters if iteration % 20 == 0: print('iteration: {}'.format(iteration)) print('training perplexity: {}'.format( np.exp(float(sum_perp) / count))) sum_perp = 0 count = 0 if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('validation perplexity: {}'.format( evaluate(model, val_iter))) # Evaluate on test dataset print('test') test_perp = evaluate(model, test_iter) print('test perplexity: {}'.format(test_perp)) # Save the model and the optimizer out = args.out if not os.path.exists(out): os.makedirs(out) print('save the model') serializers.save_npz(os.path.join(out, 'rnnlm.model'), model) print('save the optimizer') serializers.save_npz(os.path.join(out, 'rnnlm.state'), optimizer)
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot using model ' 'and state files in the specified directory') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train model = L.Classifier(train_mnist.MLP(args.unit, 10)) if args.gpu >= 0: # Make a speciied GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) if args.resume: # Resume from a snapshot serializers.load_npz('{}/mlp.model'.format(args.resume), model) serializers.load_npz('{}/mlp.state'.format(args.resume), optimizer) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_count = len(train) test_count = len(test) with MultiprocessIterator(train, args.batchsize) as train_iter, \ MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False) as test_iter: sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() x, t = convert.concat_examples(batch, args.gpu) optimizer.update(model, x, t) sum_loss += float(model.loss.data) * len(t) sum_accuracy += float(model.accuracy.data) * len(t) if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation sum_accuracy = 0 sum_loss = 0 # Enable evaluation mode. with configuration.using_config('train', False): # This is optional but can reduce computational overhead. with chainer.using_config('enable_backprop', False): for batch in test_iter: x, t = convert.concat_examples(batch, args.gpu) loss = model(x, t) sum_loss += float(loss.data) * len(t) sum_accuracy += float(model.accuracy.data) * len(t) test_iter.reset() print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0 # Save the model and the optimizer print('save the model') serializers.save_npz('{}/mlp.model'.format(args.out), model) print('save the optimizer') serializers.save_npz('{}/mlp.state'.format(args.out), optimizer)
def main(): # get args parser = argparse.ArgumentParser(description='ConvolutionNN') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=10, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--make-dataset', '-m', action='store_const', const=True, default=False, help='make dataset in circle_motion directory') args = parser.parse_args() if args.make_dataset: print('Generate dataset.') d = Dataset() d.make() print('Finish. Saved in circle_motion directory.') print('--------') print('') # print config print("#GPU : {}".format(args.gpu)) print("#batchsize : {}".format(args.batchsize)) print("#epoch : {}".format(args.epoch)) # Model model = Net(50, 10) # setting for using GPU if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Data train = make_dataset() print("train shape:", train[0][0].shape) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(train, args.batchsize) # print header print("Epoch\tloss(train)") # train while train_iter.epoch < args.epoch: # next batch data train_batch = train_iter.next() (x, t) = convert.concat_examples(train_batch) # calculate loss and optimize params model.cleargrads() loss = model.forward(x, t) loss.backward() optimizer.update() # log every epoch if( train_iter.is_new_epoch ): print("%d\t%f" % (train_iter.epoch, loss.data)) # 生成した結果を保存 idx = 0 # データの番号 x = np.array([train[idx][0]]) y = model(x) if args.gpu >= 0: y.to_cpu() result = y.data[0][0] print(result.shape) save_result( result )
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--model', '-m', default='MLP', help='Choose the model: MLP or MLPSideEffect') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train if args.model == 'MLP': model = L.Classifier(train_mnist.MLP(args.unit, 10)) elif args.model == 'MLPSideEffect': model = L.Classifier(train_mnist.MLPSideEffect(args.unit, 10)) if args.gpu >= 0: # Make a speciied GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_count = len(train) test_count = len(test) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) optimizer.update(model, x, t) sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) if train_iter.is_new_epoch: print('epoch: ', train_iter.epoch) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation sum_accuracy = 0 sum_loss = 0 # It is good practice to turn off train mode during evaluation. with configuration.using_config('train', False): for batch in test_iter: x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) loss = model(x, t) sum_loss += float(loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) test_iter.reset() print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0 # Save the model and the optimizer print('save the model') serializers.save_npz('mlp.model', model) print('save the optimizer') serializers.save_npz('mlp.state', optimizer)
def main(): np.random.seed(0) random.seed(1) parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=20, help='Number of examples in each mini-batch') parser.add_argument('--bproplen', '-l', type=int, default=25, help='Number of words in each mini-batch ' '(= length of truncated BPTT)') parser.add_argument('--epoch', '-e', type=int, default=39, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.set_defaults(test=False) parser.add_argument('--unit', '-u', type=int, default=650, help='Number of LSTM units in each layer') args = parser.parse_args() def evaluate(model, iter): # Evaluation routine to be used for validation and test. evaluator = model.copy() # to use different state evaluator.rnn.reset_state() # initialize state sum_perp = 0 data_count = 0 words = [] labels = [] lossfun = softmax_cross_entropy.softmax_cross_entropy with configuration.using_config('train', False): iter.reset() for batch in iter: word, label = convert.concat_examples(batch, args.gpu) words.append(word) labels.append(label) data_count += 1 outputs = evaluator(words) for ind in range(len(outputs)): y = outputs[ind] label = labels[ind] loss = lossfun(y, label) sum_perp += loss.array return np.exp(float(sum_perp) / data_count) # Load the Penn Tree Bank long word sequence dataset train, val, test = chainer.datasets.get_ptb_words() n_vocab = max(train) + 1 # train is just an array of integers print('#vocab =', n_vocab) if args.test: train = train[:100] val = val[:100] test = test[:100] # Create the dataset iterators train_iter = ParallelSequentialIterator(train, args.batchsize) val_iter = ParallelSequentialIterator(val, 1, repeat=False) test_iter = ParallelSequentialIterator(test, 1, repeat=False) # Prepare an RNNLM model model = RNNForLMUnrolled(n_vocab, args.unit) lossfun = softmax_cross_entropy.softmax_cross_entropy if args.gpu >= 0: # Make the specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Set up an optimizer optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) sum_perp = 0 count = 0 iteration = 0 while train_iter.epoch < args.epoch: iteration += 1 words = [] labels = [] # Progress the dataset iterator for bprop_len words at each iteration. for i in range(args.bproplen): # Get the next batch (a list of tuples of two word IDs) batch = train_iter.__next__() # Concatenate the word IDs to matrices and send them to the device # self.converter does this job # (it is chainer.dataset.concat_examples by default) word, label = convert.concat_examples(batch, args.gpu) words.append(word) labels.append(label) count += 1 outputs = model(words) loss = 0 for ind in range(len(outputs)): y = outputs[ind] label = labels[ind] loss += lossfun(y, label) sum_perp += loss.array optimizer.target.cleargrads() # Clear the parameter gradients loss.backward() # Backprop loss.unchain_backward() # Truncate the graph optimizer.update() # Update the parameters if iteration % 20 == 0: print('iteration: ', iteration) print('training perplexity: ', np.exp(float(sum_perp) / count)) sum_perp = 0 count = 0 if train_iter.is_new_epoch: print('Evaluating model on validation set...') print('epoch: ', train_iter.epoch) print('validation perplexity: ', evaluate(model, val_iter)) # Evaluate on test dataset print('test') test_perp = evaluate(model, test_iter) print('test perplexity:', test_perp) # Save the model and the optimizer print('save the model') serializers.save_npz('rnnlm.model', model) print('save the optimizer') serializers.save_npz('rnnlm.state', optimizer)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=20, help='Number of examples in each mini-batch') parser.add_argument('--bproplen', '-l', type=int, default=35, help='Number of words in each mini-batch ' '(= length of truncated BPTT)') parser.add_argument('--epoch', '-e', type=int, default=39, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.set_defaults(test=False) parser.add_argument('--unit', '-u', type=int, default=650, help='Number of LSTM units in each layer') args = parser.parse_args() def evaluate(model, iter): # Evaluation routine to be used for validation and test. model.predictor.train = False evaluator = model.copy() # to use different state evaluator.predictor.reset_state() # initialize state evaluator.predictor.train = False # dropout does nothing sum_perp = 0 data_count = 0 for batch in copy.copy(iter): x, t = convert.concat_examples(batch, args.gpu) loss = evaluator(x, t) sum_perp += loss.data data_count += 1 model.predictor.train = True return np.exp(float(sum_perp) / data_count) # Load the Penn Tree Bank long word sequence dataset train, val, test = chainer.datasets.get_ptb_words() n_vocab = max(train) + 1 # train is just an array of integers print('#vocab =', n_vocab) if args.test: train = train[:100] val = val[:100] test = test[:100] # Create the dataset iterators train_iter = train_ptb.ParallelSequentialIterator(train, args.batchsize) val_iter = train_ptb.ParallelSequentialIterator(val, 1, repeat=False) test_iter = train_ptb.ParallelSequentialIterator(test, 1, repeat=False) # Prepare an RNNLM model rnn = train_ptb.RNNForLM(n_vocab, args.unit) model = L.Classifier(rnn) model.compute_accuracy = False # we only want the perplexity if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # make the GPU current model.to_gpu() # Set up an optimizer optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) sum_perp = 0 count = 0 iteration = 0 while train_iter.epoch < args.epoch: loss = 0 iteration += 1 # Progress the dataset iterator for bprop_len words at each iteration. for i in range(args.bproplen): # Get the next batch (a list of tuples of two word IDs) batch = train_iter.__next__() # Concatenate the word IDs to matrices and send them to the device # self.converter does this job # (it is chainer.dataset.concat_examples by default) x, t = convert.concat_examples(batch, args.gpu) # Compute the loss at this time step and accumulate it loss += optimizer.target(chainer.Variable(x), chainer.Variable(t)) count += 1 sum_perp += loss.data optimizer.target.cleargrads() # Clear the parameter gradients loss.backward() # Backprop loss.unchain_backward() # Truncate the graph optimizer.update() # Update the parameters if iteration % 20 == 0: print('iteration: ', iteration) print('training perplexity: ', np.exp(float(sum_perp) / count)) sum_perp = 0 count = 0 if train_iter.is_new_epoch: print('epoch: ', train_iter.epoch) print('validation perplexity: ', evaluate(model, val_iter)) # Evaluate on test dataset print('test') test_perp = evaluate(model, test_iter) print('test perplexity:', test_perp) # Save the model and the optimizer print('save the model') serializers.save_npz('rnnlm.model', model) print('save the optimizer') serializers.save_npz('rnnlm.state', optimizer)
def extract(self, images, layers=None, size=(224, 224), **kwargs): """extract(self, images, layers=['pool5'], size=(224, 224)) Extracts all the feature maps of given images. The difference of directly executing ``forward`` is that it directly accepts images as an input and automatically transforms them to a proper variable. That is, it is also interpreted as a shortcut method that implicitly calls ``prepare`` and ``forward`` functions. Unlike ``predict`` method, this method does not override ``chainer.config.train`` and ``chainer.config.enable_backprop`` configuration. If you want to extract features without updating model parameters, you need to manually set configuration when calling this method as follows: .. code-block:: python # model is an instance of ResNetLayers (50 or 101 or 152 layers) with chainer.using_config('train', False): with chainer.using_config('enable_backprop', False): feature = model.extract([image]) .. warning:: ``test`` and ``volatile`` arguments are not supported anymore since v2. Instead, users should configure training and volatile modes with ``train`` and ``enable_backprop``, respectively. Note that default behavior of this method is different between v1 and later versions. Specifically, the default values of ``test`` in v1 were ``True`` (test mode). But that of ``chainer.config.train`` is also ``True`` (train mode). Therefore, users need to explicitly switch ``train`` to ``False`` to run the code in test mode and ``enable_backprop`` to ``False`` to turn off coputational graph construction. See the `upgrade guide <https://docs.chainer.org/en/stable\ /upgrade_v2.html#training-mode-is-configured-by-a-thread-local-flag>`_. Args: images (iterable of PIL.Image or numpy.ndarray): Input images. layers (list of str): The list of layer names you want to extract. size (pair of ints): The resolution of resized images used as an input of CNN. All the given images are not resized if this argument is ``None``, but the resolutions of all the images should be the same. Returns: Dictionary of ~chainer.Variable: A directory in which the key contains the layer name and the value contains the corresponding feature map variable. """ if layers is None: layers = ['pool5'] if kwargs: argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config', volatile='volatile argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) x = concat_examples([prepare(img, size=size) for img in images]) x = Variable(self.xp.asarray(x)) return self(x, layers=layers)
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.add_argument('--resume', '-r', type=str, help='Directory that has `vgg.model` and `vgg.state`') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.test: train = train[:200] test = test[:200] train_count = len(train) test_count = len(test) model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) if args.resume is not None: resume = args.resume if os.path.exists(resume): serializers.load_npz(os.path.join(resume, 'vgg.model'), model) serializers.load_npz(os.path.join(resume, 'vgg.state'), optimizer) else: raise ValueError( '`args.resume` ("{}") is specified,' ' but it does not exist.'.format(resume) ) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) sum_acc = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() # Reduce learning rate by 0.5 every 25 epochs. if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch: optimizer.lr *= 0.5 print('Reducing learning rate to: {}'.format(optimizer.lr)) x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) optimizer.update(model, x, t) sum_loss += float(model.loss.array) * len(t) sum_acc += float(model.accuracy.array) * len(t) if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_acc / train_count)) sum_acc = 0 sum_loss = 0 # Enable evaluation mode. with configuration.using_config('train', False): # This is optional but can reduce computational overhead. with chainer.using_config('enable_backprop', False): for batch in test_iter: x, t = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x) t = chainer.Variable(t) loss = model(x, t) sum_loss += float(loss.array) * len(t) sum_acc += float(model.accuracy.array) * len(t) test_iter.reset() print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_acc / test_count)) sum_acc = 0 sum_loss = 0 # Save the model and the optimizer out = args.out if not os.path.exists(out): os.makedirs(out) print('save the model') serializers.save_npz(os.path.join(out, 'vgg.model'), model) print('save the optimizer') serializers.save_npz(os.path.join(out, 'vgg.state'), optimizer)
def test_mnist_simple(self, display_log=True): updater, optimizer, train_iter, _, model = self.setup_mnist_trainer() path = tempfile.mkdtemp(dir='/tmp', prefix=__name__ + "-tmp-") if display_log: print("temporary file:", path) checkpointer = create_multi_node_checkpointer(name=__name__, comm=self.communicator, path=path) checkpointer.maybe_load(updater, optimizer) sum_accuracy = 0 sum_loss = 0 stop = 5 train_count = len(train_iter.dataset) while train_iter.epoch < stop: batch = train_iter.next() x_array, t_array = convert.concat_examples(batch, -1) x = chainer.Variable(x_array) t = chainer.Variable(t_array) optimizer.update(model, x, t) sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) if train_iter.is_new_epoch: if display_log: print(updater.iteration, train_iter.epoch, sum_loss / train_count, sum_accuracy / train_count) sum_loss = 0 sum_accuracy = 0 checkpointer.save(updater, updater.iteration) if display_log: print(self.communicator.rank, checkpointer.get_stats()) # Allocate totally different set of training tools to avoid leakage data_2 = self.setup_mnist_trainer() updater2, optimizer2, train_iter2, test_iter2, model2 = data_2 checkpointer2 = create_multi_node_checkpointer( name=__name__, comm=self.communicator, path=path) checkpointer2.maybe_load(updater2, optimizer2) # Check data properly resumed self.assertEqual(updater.epoch, updater2.epoch) self.assertEqual(updater.iteration, updater2.iteration) # TODO(kuenishi): find a simple way to assure model equality # in terms of float matrix # self.assertEqual(model, model2) # Restart training while train_iter2.epoch < stop * 2: batch = train_iter2.next() x_array, t_array = convert.concat_examples(batch, -1) x = chainer.Variable(x_array) t = chainer.Variable(t_array) optimizer2.update(model2, x, t) sum_loss += float(model2.loss.data) * len(t.data) sum_accuracy += float(model2.accuracy.data) * len(t.data) if train_iter2.is_new_epoch: print(updater2.iteration, train_iter2.epoch, sum_loss / train_count, sum_accuracy / train_count) sum_loss = 0 sum_accuracy = 0 checkpointer2.save(updater2, updater2.iteration) if display_log: print(self.communicator.rank, checkpointer2.get_stats()) checkpointer2.finalize() checkpointer.finalize() # Validate training sum_accuracy = 0 sum_loss = 0 test_count = len(test_iter2.dataset) for batch in test_iter2: x_array, t_array = convert.concat_examples(batch, -1) x = chainer.Variable(x_array) t = chainer.Variable(t_array) loss = model2(x, t) sum_loss += float(loss.data) * len(t.data) sum_accuracy += float(model2.accuracy.data) * len(t.data) if display_log: print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) self.assertGreaterEqual(sum_accuracy / test_count, 0.95) os.removedirs(path)