def __call__(self, x, **kwargs): """__call__(self, x, finetune=False) Invokes the forward propagation of BatchNormalization. In training mode, the BatchNormalization computes moving averages of mean and variance for evaluation during training, and normalizes the input using batch statistics. .. warning:: ``test`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', False)``. See :func:`chainer.using_config`. Args: x (Variable): Input variable. finetune (bool): If it is in the training mode and ``finetune`` is ``True``, BatchNormalization runs in fine-tuning mode; it accumulates the input array to compute population statistics for normalization, and normalizes the input using batch statistics. """ argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config') finetune, = argument.parse_kwargs(kwargs, ('finetune', False)) if hasattr(self, 'gamma'): gamma = self.gamma else: with cuda.get_device_from_id(self._device_id): gamma = variable.Variable(self.xp.ones( self.avg_mean.shape, dtype=x.dtype)) if hasattr(self, 'beta'): beta = self.beta else: with cuda.get_device_from_id(self._device_id): beta = variable.Variable(self.xp.zeros( self.avg_mean.shape, dtype=x.dtype)) if configuration.config.train: if finetune: self.N += 1 decay = 1. - 1. / self.N else: decay = self.decay ret = functions.batch_normalization( x, gamma, beta, eps=self.eps, running_mean=self.avg_mean, running_var=self.avg_var, decay=decay) else: # Use running average statistics or fine-tuned statistics. mean = variable.Variable(self.avg_mean) var = variable.Variable(self.avg_var) ret = functions.fixed_batch_normalization( x, gamma, beta, mean, var, self.eps) return ret
def __call__(self, c, h, x): """Returns new cell state and updated output of LSTM. Args: c (~chainer.Variable): Cell states of LSTM units. h (~chainer.Variable): Output at the previous time step. x (~chainer.Variable): A new batch from the input sequence. Returns: tuple of ~chainer.Variable: Returns ``(c_new, h_new)``, where ``c_new`` represents new cell state, and ``h_new`` is updated output of LSTM units. """ if self.upward.W.data is None: in_size = x.size // x.shape[0] with cuda.get_device_from_id(self._device_id): self.upward._initialize_params(in_size) self._initialize_params() lstm_in = self.upward(x) if h is not None: lstm_in += self.lateral(h) if c is None: xp = self.xp with cuda.get_device_from_id(self._device_id): c = variable.Variable( xp.zeros((x.shape[0], self.state_size), dtype=x.dtype)) return lstm.lstm(c, lstm_in)
def forward(self, x, finetune=False): if self.gamma is not None: gamma = self.gamma else: with cuda.get_device_from_id(self._device_id): gamma = self.xp.ones( self.avg_mean.shape, dtype=x.dtype) if self.beta is not None: beta = self.beta else: with cuda.get_device_from_id(self._device_id): beta = self.xp.zeros( self.avg_mean.shape, dtype=x.dtype) if configuration.config.train: if finetune: self.N += 1 decay = 1. - 1. / self.N else: decay = self.decay ret = batch_renormalization.batch_renormalization( x, gamma, beta, self.rmax, self.dmax, self.eps, self.avg_mean, self.avg_var, decay, update_statistics=True) else: # Use running average statistics or fine-tuned statistics. mean = self.avg_mean var = self.avg_var ret = batch_normalization.fixed_batch_normalization( x, gamma, beta, mean, var, self.eps) return ret
def test_forward_gpu_multi(self): with cuda.get_device_from_id(0): self.link.to_gpu() x1 = cuda.to_gpu(self.x1) x2 = cuda.to_gpu(self.x2) x3 = cuda.to_gpu(self.x3) with cuda.get_device_from_id(1): self.check_forward(x1, x2, x3)
def test_forward_gpu_multi(self): with cuda.get_device_from_id(0): self.link.to_gpu() c = cuda.to_gpu(self.c) h = cuda.to_gpu(self.h) x = cuda.to_gpu(self.x) with cuda.get_device_from_id(1): self.check_forward(c, h, x)
def __call__(self, batch, device=None, padding=None): """Concatenate data and transfer them to GPU asynchronously. See also :func:`chainer.dataset.concat_examples`. Args: batch (list): A list of examples. device (int): Device ID to which each array is sent. padding: Scalar value for extra elements. Returns: Array, a tuple of arrays, or a dictionary of arrays. The type depends on the type of each example in the batch. """ if len(batch) == 0: raise ValueError('batch is empty') first_elem = batch[0] if len(self._conveyor) == 0: self._device = device # device is set at first call if device is not None and device >= 0 and self._stream is None: with cuda.get_device_from_id(device): self._stream = cuda.Stream(non_blocking=True) if device is not self._device: raise ValueError('device is different') with cuda.get_device_from_id(device): if isinstance(first_elem, tuple): result = [] if not isinstance(padding, tuple): padding = [padding] * len(first_elem) for i in six.moves.range(len(first_elem)): self._conveyor[i].put(_concat_arrays( [example[i] for example in batch], padding[i])) for i in six.moves.range(len(first_elem)): result.append(self._conveyor[i].get()) return tuple(result) elif isinstance(first_elem, dict): result = {} if not isinstance(padding, dict): padding = {key: padding for key in first_elem} for key in first_elem: self._conveyor[key].put(_concat_arrays( [example[key] for example in batch], padding[key])) for key in first_elem: result[key] = self._conveyor[key].get() return result else: return to_device(device, _concat_arrays(batch, padding))
def __call__(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ if self.upward.W.data is None: with cuda.get_device_from_id(self._device_id): in_size = functools.reduce(operator.mul, x.shape[1:], 1) self.upward._initialize_params(in_size) self._initialize_params() batch = x.shape[0] lstm_in = self.upward(x) h_rest = None if self.h is not None: h_size = self.h.shape[0] if batch == 0: h_rest = self.h elif h_size < batch: msg = ('The batch size of x must be equal to or less than' 'the size of the previous state h.') raise TypeError(msg) elif h_size > batch: h_update, h_rest = split_axis.split_axis( self.h, [batch], axis=0) lstm_in += self.lateral(h_update) else: lstm_in += self.lateral(self.h) if self.c is None: xp = self.xp with cuda.get_device_from_id(self._device_id): self.c = variable.Variable( xp.zeros((batch, self.state_size), dtype=x.dtype)) self.c, y = lstm.lstm(self.c, lstm_in) if h_rest is None: self.h = y elif len(y.data) == 0: self.h = h_rest else: self.h = concat.concat([y, h_rest], axis=0) return y
def initialize(self, shape): """Initializes the uninitialized variable. Uninitialized variable is a variable created with the data array set to None. This method creates and initializes the data array. The shape of the variable can be left unknown until this method is called. Args: shape (tuple of int): Shape of the data array. """ xp = numpy if self._initial_backend != 'cuda' else cuda.cupy with cuda.get_device_from_id(self._initial_device): data = initializers.generate_array(self.initializer, shape, xp) ginit = self._grad_initializer grad = None if ginit is None else initializers.generate_array( ginit, shape, xp) self.data = data self.grad = grad # Convert the array for iDeep. if self._initial_backend == 'intel64': self.to_intel64()
def test_backward_non_default_gpu(self): x0 = chainer.Variable(cuda.to_gpu(self.x0, 1)) x1 = chainer.Variable(cuda.to_gpu(self.x1, 1)) gy = cuda.to_gpu(self.gy, 1) with cuda.get_device_from_id(0): y = functions.absolute_error(x0, x1) y.grad = gy y.backward()
def update_core(self): optimizer = self.get_optimizer('main') model_main = optimizer.target models_others = {k: v for k, v in self._models.items() if v is not model_main} iterator = self.get_iterator('main') batch = iterator.next() # # Split the batch to sub-batches. # n = len(self._models) in_arrays_list = {} for i, key in enumerate(six.iterkeys(self._models)): in_arrays_list[key] = self.converter( batch[i::n], self._devices[key]) # For reducing memory for model in six.itervalues(self._models): model.cleargrads() losses = [] for model_key, model in six.iteritems(self._models): in_arrays = in_arrays_list[model_key] loss_func = self.loss_func or model with function.force_backprop_mode(): dev_id = self._devices[model_key] dev_id = dev_id if 0 <= dev_id else None with cuda.get_device_from_id(dev_id): if isinstance(in_arrays, tuple): loss = loss_func(*in_arrays) elif isinstance(in_arrays, dict): loss = loss_func(**in_arrays) else: loss = loss_func(in_arrays) losses.append(loss) # For _uninitialized_params for model in six.itervalues(self._models): model.cleargrads() for loss in losses: loss.backward(loss_scale=self.loss_scale) for model in six.itervalues(models_others): model_main.addgrads(model) optimizer.update() for model in six.itervalues(models_others): model.copyparams(model_main) if self.auto_new_epoch and iterator.is_new_epoch: optimizer.new_epoch(auto=True)
def __call__(self, x, finetune=False): if self.gamma is not None: gamma = self.gamma else: with cuda.get_device_from_id(self._device_id): gamma = variable.Variable(self.xp.ones( self.avg_mean.shape, dtype=x.dtype)) if self.beta is not None: beta = self.beta else: with cuda.get_device_from_id(self._device_id): beta = variable.Variable(self.xp.zeros( self.avg_mean.shape, dtype=x.dtype)) if configuration.config.train: if finetune: self.N += 1 decay = 1. - 1. / self.N else: decay = self.decay func = batch_renormalization.BatchRenormalizationFunction( self.eps, self.avg_mean, self.avg_var, decay, self.rmax, self.dmax, self.freeze_running_statistics) if self.freeze_running_statistics: func.r = self.r func.d = self.d ret = func(x, gamma, beta) if self.freeze_running_statistics and self.r is None: self.r = func.r self.d = func.d self.avg_mean[:] = func.running_mean self.avg_var[:] = func.running_var else: # Use running average statistics or fine-tuned statistics. mean = variable.Variable(self.avg_mean) var = variable.Variable(self.avg_var) ret = batch_renormalization.fixed_batch_renormalization( x, gamma, beta, mean, var, self.eps) return ret
def forward(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ lstm_in = self.upward(x) if self.h is not None: lstm_in += self.lateral(self.h) else: xp = self.xp with cuda.get_device_from_id(self._device_id): self.h = variable.Variable( xp.zeros((len(x.data), self.state_size), dtype=x.data.dtype)) if self.c is None: xp = self.xp with cuda.get_device_from_id(self._device_id): self.c = variable.Variable( xp.zeros((len(x.data), self.state_size), dtype=x.data.dtype)) lstm_in = reshape.reshape(lstm_in, (len(lstm_in.data), lstm_in.data.shape[1] // 4, 4)) a, i, f, o = split_axis.split_axis(lstm_in, 4, 2) a = reshape.reshape(a, (len(a.data), self.state_size)) i = reshape.reshape(i, (len(i.data), self.state_size)) f = reshape.reshape(f, (len(f.data), self.state_size)) o = reshape.reshape(o, (len(o.data), self.state_size)) c_tmp = tanh.tanh(a) * sigmoid.sigmoid(i) + sigmoid.sigmoid(f) * self.c self.c = zoneout.zoneout(self.c, c_tmp, self.c_ratio) self.h = zoneout.zoneout(self.h, sigmoid.sigmoid(o) * tanh.tanh(c_tmp), self.h_ratio) return self.h
def check_multi_gpu_forward(self, train=True): # See chainer/chainer#6262 # NStepBiRNNTanh and NStepBiRNNReLU w/ cudnn & dropout should work on # not current device msg = None rnn = self.rnn.copy('copy') rnn.dropout = .5 with cuda.get_device_from_id(1): if self.hidden_none: h = None else: h = cuda.to_gpu(self.h) xs = [cuda.to_gpu(x) for x in self.xs] rnn = rnn.to_gpu() with cuda.get_device_from_id(0),\ chainer.using_config('train', train),\ chainer.using_config('use_cudnn', 'always'): try: rnn(h, xs) except Exception as e: msg = e assert msg is None
def put(self, array): """Initiates asynchronous transfer of an array to a target device. This method assumes that the input array is a numpy array and on host memory without page-locked. So, it first copys the data to page-locked host memory (so called pinned memory), then initiates asynchronous data transfer to a target device. The intermediate arrays on pinned memory and cupy arrays on the target device are retained at self._array_set in order to reduce number of memory allocation/release, and they are to be reused for subsequent data transfer as long as the size are the same. Double buffering scheme is used here, so you can initiate next data transfer safely even when current data is still used on the target device. """ if self._device is None or self._device < 0 or self._stream is None: self._ret_array.append(to_device(self._device, array)) return pin_array, cp_array = self._array_set.pop(0) if pin_array is not None: if pin_array.nbytes != array.nbytes: pin_array = None with cuda.get_device_from_id(self._device): if pin_array is None: # The global synchronization below is necessary to ensure ALL # operations including compute and data transfer submitted # to GPU so far have been completed, in order to avoid possible # memory corruption due to race condition among operations that # use different CUDA streams. # You can also solve this sort of race condition by preparing a # memory pool for each CUDA stream and using it carefully. cuda.cupy.cuda.runtime.deviceSynchronize() pin_mem = cuda.cupy.cuda.alloc_pinned_memory(array.nbytes) pin_array = numpy.frombuffer(pin_mem, array.dtype, array.size ).reshape(array.shape) cp_array = cuda.cupy.empty_like(array) pin_array[...] = array # copy(CPU): paged -> pinned cp_array.set(pin_array, self._stream) # copy: CPU to GPU self._array_set.append([pin_array, cp_array]) self._ret_array.append(cp_array)
def forward(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ lstm_in = self.upward(x) if self.h is not None: lstm_in += self.lateral(self.h) if self.c is None: xp = self.xp with cuda.get_device_from_id(self._device_id): self.c = variable.Variable( xp.zeros((x.shape[0], self.state_size), dtype=x.dtype)) lstm_in = reshape.reshape(lstm_in, (len(lstm_in.data), lstm_in.shape[1] // 4, 4)) a, i, f, o = split_axis.split_axis(lstm_in, 4, 2) a = reshape.reshape(a, (len(a.data), a.shape[1])) i = reshape.reshape(i, (len(i.data), i.shape[1])) f = reshape.reshape(f, (len(f.data), f.shape[1])) o = reshape.reshape(o, (len(o.data), o.shape[1])) peep_in_i = self.peep_i(self.c) peep_in_f = self.peep_f(self.c) a = tanh.tanh(a) i = sigmoid.sigmoid(i + peep_in_i) f = sigmoid.sigmoid(f + peep_in_f) self.c = a * i + f * self.c peep_in_o = self.peep_o(self.c) o = sigmoid.sigmoid(o + peep_in_o) self.h = o * tanh.tanh(self.c) return self.h
def test_get_device_from_id(self): assert cuda.get_device_from_id(0) == cuda.Device(0)
def test_get_dummy_device(self): assert cuda.get_device_from_id(None) is cuda.DummyDevice
def test_get_device_for_device(self): device = cuda.get_device_from_id(0) with testing.assert_warns(DeprecationWarning): assert cuda.get_device(device) is device
def test_forward_gpu_multi(self): with cuda.get_device_from_id(0): self.link.to_gpu() inputs = [cuda.to_gpu(v) for v in self.inputs] with cuda.get_device_from_id(1): self.check_forward(*inputs)
def accuracy_gpu(self, device): with cuda.get_device_from_id(device): return self.accuracy( backend.BackendConfig({'use_cuda': True}), device)
def init_hx(self, xs): shape = (self.n_layers * self.direction, len(xs), self.out_size) with cuda.get_device_from_id(self._device_id): hx = variable.Variable(self.xp.zeros(shape, dtype=xs[0].dtype)) return hx
def test_get_device_from_id(self): self.assertEqual(cuda.get_device_from_id(0), cuda.Device(0))
def test_get_device_from_id_for_numpy_int(self): self.assertEqual(cuda.get_device_from_id(numpy.int64(0)), cuda.Device(0))
def test_get_dummy_device(self): self.assertIs(cuda.get_device_from_id(None), cuda.DummyDevice)
def main(): parser = argparse.ArgumentParser(description='SLPolicyNetwork', formatter_class=RawTextHelpFormatter) parser.add_argument('CONFIG', default=None, type=str, help='path to config file') parser.add_argument('MODEL', default=None, type=str, help='path to model.npz') parser.add_argument('--gpu', type=int, default=-1, help='gpu numbers\nto specify') parser.add_argument('--debug', default=False, action='store_true', help='switch to debug mode') args = parser.parse_args() print('*** load config ***') with open(args.CONFIG, "r") as f: config = json.load(f) print('*** set up model ***') n_input_channel = config["arguments"]["n_input_channel"] n_output_channel = config["arguments"]["n_output_channel"] rl_policy_network = RLPolicyNetwork(n_input_channel=n_input_channel, n_output_channel=n_output_channel) serializers.load_npz(args.MODEL, rl_policy_network) optimizer = chainer.optimizers.Adam(alpha=config["arguments"]["learning_rate"]) optimizer.setup(rl_policy_network) if args.gpu: cuda.get_device_from_id(args.gpu).use() rl_policy_network.to_gpu(args.gpu) xp = cuda.cupy else: xp = np rl_policy_network.set_cache() # define parameters N = 30000 batch_size = 150 first_choices = [0x0000100000000000, 0x0000002000000000, 0x0000000004000000, 0x0000000000080000] bar = ProgressBar(0, N) print('*** start iteration ***') for i in range(N): bar.update(i) start = time.time() opponent = SLPolicyNetwork(n_input_channel=n_input_channel, n_output_channel=n_output_channel) opponent_model_path = np.random.choice(glob.glob("./result/sl_policy/slpn.epoch*.npz")) print(f'\nmodel:{opponent_model_path} is chosen') serializers.load_npz(opponent_model_path, opponent) if args.gpu: opponent.to_gpu(args.gpu) opponent.set_cache() agent = Agent(batch_size, xp, rl_policy_network, optimizer) env = Env(batch_size, xp, rl_policy_network, opponent) env.reset() is_black = True if i % 2 == 1: first_actions = xp.random.choice(first_choices, batch_size).astype('uint64').reshape(-1, 1) reversible_mask = env.reversible(first_actions, is_black) env.black, env.white = \ env.reverse(first_actions, is_black, reversible_mask) is_black = not is_black obs = env.create_current_states(is_black) done = False while not done: action_indices = agent.act(obs) obs, _, done, _ = env.step(action_indices, is_black) bs = xp.sum(obs[:, 0].reshape(batch_size, -1), axis=1) # (b, 8, 8) -> (b, ) ws = xp.sum(obs[:, 1].reshape(batch_size, -1), axis=1) true_rewards = bs > ws if is_black else ws > bs agent.update(true_rewards) count = xp.sum(bs > ws) if is_black else xp.sum(ws > bs) print(f'{time.time() - start:.02f} sec elapsed') print(f'win rate:{int(count) * 100 / batch_size:.02f}') else: serializers.save_npz("result/rl_policy.npz", rl_policy_network)
def main(): parser = argparse.ArgumentParser(description='Chainer example: VAE') parser.add_argument('--initmodel', '-m', default='', help='Initialize the model from given file') parser.add_argument('--resume', '-r', default='', help='Resume the optimization from snapshot') parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--epoch', '-e', default=100, type=int, help='number of epochs to learn') parser.add_argument('--dimz', '-z', default=20, type=int, help='dimention of encoded vector') parser.add_argument('--batchsize', '-b', type=int, default=100, help='learning minibatch size') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# dim z: {}'.format(args.dimz)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') if 0 <= args.gpu: cuda.get_device_from_id(args.gpu).use() # net内VAEオブジェクトの生成 model = net_img.VAE(3, 10, 64) chainer.serializers.load_npz("birds_img.npz", model) if 0 <= args.gpu: model.to_gpu() # GPUを使うための処理 optimizer = chainer.optimizers.Adam() optimizer.setup(model) # モデルの読み込み npzはnumpy用 """ if args.initmodel: chainer.serializers.load_npz(args.initmodel, model) """ traini = np.load('birds_img.npy') traini = traini.reshape((len(traini), 3, 128, 128)) # Visualize the results def save_images(x, filename): import matplotlib.pyplot as plt import matplotlib.cm as cm fig, ax = plt.subplots(3, 3, figsize=(9, 9), dpi=100) for ai, xi in zip(ax.flatten(), x): print(xi.shape) xi = xi.transpose(1, 2, 0) #xi = Image.fromarray(np.uint8(xi)) print(xi.shape) #xi.save(filename,quality=95, optimize=True) ai.imshow(xi) # , cmap=cm.gray fig.savefig(filename) #model.to_cpu() train_ind = [10, 20, 18, 19, 26] x = chainer.Variable(np.asarray(traini[train_ind])) with chainer.using_config('train', False), chainer.no_backprop_mode(): mu, ln_var = model.encode(x) x2 = model.decode(mu) save_images(x.array, os.path.join(args.out, 'train')) save_images(x2.array, os.path.join(args.out, 'train_reconstructed'))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', type=str, help='Directory to ouput the result') parser.add_argument('--resume', '-r', type=str, help='Resume the training from snapshot') parser.add_argument('--epoch', '-e', default=400, type=int, help='number of epochs to learn') parser.add_argument('--unit', '-u', default=30, type=int, help='number of units') parser.add_argument('--batchsize', '-b', type=int, default=25, help='learning minibatch size') parser.add_argument('--label', '-l', type=int, default=5, help='number of labels') parser.add_argument('--epocheval', '-p', type=int, default=5, help='number of epochs per evaluation') parser.add_argument('--test', dest='test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() n_epoch = args.epoch # number of epochs n_units = args.unit # number of units per layer batchsize = args.batchsize # minibatch size n_label = args.label # number of labels epoch_per_eval = args.epocheval # number of epochs per evaluation if args.test: max_size = 10 else: max_size = None vocab = {} train_data = [ convert_tree(vocab, tree) for tree in data.read_corpus('trees/train.txt', max_size) ] train_iter = chainer.iterators.SerialIterator(train_data, batchsize) validation_data = [ convert_tree(vocab, tree) for tree in data.read_corpus('trees/dev.txt', max_size) ] validation_iter = chainer.iterators.SerialIterator(validation_data, batchsize, repeat=False, shuffle=False) test_data = [ convert_tree(vocab, tree) for tree in data.read_corpus('trees/test.txt', max_size) ] model = RecursiveNet(len(vocab), n_units, n_label) if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Setup optimizer optimizer = optimizers.AdaGrad(lr=0.1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.0001)) def _convert(batch, _): return batch # Setup updater updater = chainer.training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=_convert) # Setup trainer and run trainer = chainer.training.Trainer(updater, (n_epoch, 'epoch'), args.out) trainer.extend(extensions.Evaluator(validation_iter, model, device=args.gpu, converter=_convert), trigger=(epoch_per_eval, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.MicroAverage('main/correct', 'main/total', 'main/accuracy')) trainer.extend( extensions.MicroAverage('validation/main/correct', 'validation/main/total', 'validation/main/accuracy')) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(epoch_per_eval, 'epoch')) if args.resume is not None: chainer.serializers.load_npz(args.resume, trainer) trainer.run() print('Test evaluation') evaluate(model, test_data)
def test_get_device_from_id_for_numpy_int(self): assert cuda.get_device_from_id(numpy.int64(0)) == cuda.Device(0)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--unit', '-u', default=100, type=int, help='number of units') parser.add_argument('--window', '-w', default=5, type=int, help='window size') parser.add_argument('--batchsize', '-b', type=int, default=1000, help='learning minibatch size') parser.add_argument('--epoch', '-e', default=20, type=int, help='number of epochs to learn') parser.add_argument('--model', '-m', choices=['skipgram', 'cbow'], default='skipgram', help='model type ("skipgram", "cbow")') parser.add_argument('--negative-size', default=5, type=int, help='number of negative samples') parser.add_argument('--out-type', '-o', choices=['hsm', 'ns', 'original'], default='hsm', help='output model type ("hsm": hierarchical softmax, ' '"ns": negative sampling, "original": ' 'no approximation)') parser.add_argument('--out', default='result', help='Directory to output the result') parser.add_argument('--test', dest='test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() cuda.check_cuda_available() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('Window: {}'.format(args.window)) print('Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('Training model: {}'.format(args.model)) print('Output type: {}'.format(args.out_type)) print('') if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() # Load the dataset train, val, _ = chainer.datasets.get_ptb_words() counts = collections.Counter(train) counts.update(collections.Counter(val)) n_vocab = max(train) + 1 if args.test: train = train[:100] val = val[:100] vocab = chainer.datasets.get_ptb_words_vocabulary() index2word = {wid: word for word, wid in six.iteritems(vocab)} print('n_vocab: %d' % n_vocab) print('data length: %d' % len(train)) if args.out_type == 'hsm': HSM = L.BinaryHierarchicalSoftmax tree = HSM.create_huffman_tree(counts) loss_func = HSM(args.unit, tree) loss_func.W.data[...] = 0 elif args.out_type == 'ns': cs = [counts[w] for w in range(len(counts))] loss_func = L.NegativeSampling(args.unit, cs, args.negative_size) loss_func.W.data[...] = 0 elif args.out_type == 'original': loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab) else: raise Exception('Unknown output type: {}'.format(args.out_type)) # Choose the model if args.model == 'skipgram': model = SkipGram(n_vocab, args.unit, loss_func) elif args.model == 'cbow': model = ContinuousBoW(n_vocab, args.unit, loss_func) else: raise Exception('Unknown model type: {}'.format(args.model)) if args.gpu >= 0: model.to_gpu() # Set up an optimizer optimizer = O.Adam() optimizer.setup(model) # Set up an iterator train_iter = WindowIterator(train, args.window, args.batchsize) val_iter = WindowIterator(val, args.window, args.batchsize, repeat=False) # Set up an updater updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=convert, device=args.gpu) # Set up a trainer trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator( val_iter, model, converter=convert, device=args.gpu)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss'])) trainer.extend(extensions.ProgressBar()) trainer.run() # Save the word2vec model with open('word2vec.model', 'w') as f: f.write('%d %d\n' % (len(index2word), args.unit)) w = cuda.to_cpu(model.embed.W.data) for i, wi in enumerate(w): v = ' '.join(map(str, wi)) f.write('%s %s\n' % (index2word[i], v))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--pretrained-model', default='imagenet') parser.add_argument('--batch-size', type=int, default=1) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--no-display', action='store_true') parser.add_argument('--shuffle', action='store_true') parser.add_argument('--save-path', type=str, default=None) parser.add_argument('--split', choices=('train', 'val'), default='val') parser.add_argument('--skip', action='store_true') parser.add_argument('--score-thresh', type=float, default=0.6) args = parser.parse_args() model = FasterRCNNVGG16(n_fg_class=len(epic_kitchens_bbox_label_names), pretrained_model=args.pretrained_model) model.score_thresh = args.score_thresh if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() model.to_gpu() dataset = EpicKitchensBboxDataset(split=args.split) dataset_iter = chainer.iterators.SerialIterator(dataset, args.batch_size, shuffle=args.shuffle, repeat=False) dataset_iter.reset() if dataset_iter._order is None: indices = dataset.ids else: indices = dataset_iter._order for batch_data in dataset_iter: imgs = [] for data in batch_data: img, _, _ = data imgs.append(img) bboxes, labels, scores = model.predict(imgs) base_index = dataset_iter.current_position - args.batch_size for b_i in range(args.batch_size): img = imgs[b_i] bbox, label, score = bboxes[b_i], labels[b_i], scores[b_i] if args.skip: if len(bbox) == 0: print('skip {}.jpg'.format(indices[base_index + b_i])) continue vis_bbox(img, bbox, label, score, label_names=epic_kitchens_bbox_label_names) if args.save_path is not None: save_path = os.path.join(thisdir, args.save_path) if not os.path.exists(save_path): os.makedirs(save_path) plt.savefig( os.path.join( save_path, '{}.png'.format(dataset.ids[base_index + b_i].replace( '/', '_')))) if not args.no_display: plt.show()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', type=str, help='Directory to ouput the result') parser.add_argument('--resume', '-r', type=str, help='Resume the training from snapshot') parser.add_argument('--epoch', '-e', default=400, type=int, help='number of epochs to learn') parser.add_argument('--unit', '-u', default=30, type=int, help='number of units') parser.add_argument('--batchsize', '-b', type=int, default=25, help='learning minibatch size') parser.add_argument('--label', '-l', type=int, default=5, help='number of labels') parser.add_argument('--epocheval', '-p', type=int, default=5, help='number of epochs per evaluation') parser.add_argument('--test', dest='test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() n_epoch = args.epoch # number of epochs n_units = args.unit # number of units per layer batchsize = args.batchsize # minibatch size n_label = args.label # number of labels epoch_per_eval = args.epocheval # number of epochs per evaluation if args.test: max_size = 10 else: max_size = None vocab = {} train_data = [convert_tree(vocab, tree) for tree in data.read_corpus('trees/train.txt', max_size)] train_iter = chainer.iterators.SerialIterator(train_data, batchsize) validation_data = [convert_tree(vocab, tree) for tree in data.read_corpus('trees/dev.txt', max_size)] validation_iter = chainer.iterators.SerialIterator( validation_data, batchsize, repeat=False, shuffle=False) test_data = [convert_tree(vocab, tree) for tree in data.read_corpus('trees/test.txt', max_size)] model = RecursiveNet(len(vocab), n_units, n_label) if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Setup optimizer optimizer = optimizers.AdaGrad(lr=0.1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.0001)) def _convert(batch, _): return batch # Setup updater updater = chainer.training.StandardUpdater( train_iter, optimizer, device=args.gpu, converter=_convert) # Setup trainer and run trainer = chainer.training.Trainer(updater, (n_epoch, 'epoch'), args.out) trainer.extend( extensions.Evaluator(validation_iter, model, device=args.gpu, converter=_convert), trigger=(epoch_per_eval, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.MicroAverage( 'main/correct', 'main/total', 'main/accuracy')) trainer.extend(extensions.MicroAverage( 'validation/main/correct', 'validation/main/total', 'validation/main/accuracy')) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(epoch_per_eval, 'epoch')) if args.resume is not None: chainer.serializers.load_npz(args.resume, trainer) trainer.run() print('Test evaluation') evaluate(model, test_data)
def test_init_gpu_with_current_device(self): device_id = 1 with cuda.get_device_from_id(device_id): array = self._generate_array(cuda.cupy, 'float64') assert array.device.id == device_id
def main(): parser = argparse.ArgumentParser(description='Chainer Darknet53 Train') parser.add_argument('--batchsize', '-b', type=int, default=8) parser.add_argument('--iteration', '-i', type=int, default=100000) parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[]) parser.add_argument('--out', '-o', default='darknet53-voc-result') parser.add_argument('--seed', default=0) parser.add_argument('--display_interval', type=int, default=100) parser.add_argument('--snapshot_interval', type=int, default=100) parser.add_argument('--validation_size', type=int, default=2048) args = parser.parse_args() print('GPUs: {}'.format(args.gpus)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# iteration: {}'.format(args.iteration)) print('') random.seed(args.seed) np.random.seed(args.seed) darknet53 = Darknet53(20) model = L.Classifier(darknet53) device = -1 if len(args.gpus) > 0: device = args.gpus[0] cuda.cupy.random.seed(args.seed) cuda.get_device_from_id(args.gpus[0]).use() if len(args.gpus) == 1: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.0005), 'hook_decay') train = VOCBboxDataset(split='train') test = VOCBboxDataset(split='val') train = YOLOVOCDataset(train, classifier=True, jitter=0.2, hue=0.1, sat=.75, val=.75) test = YOLOVOCDataset(test, classifier=True, crop_size=(256, 256)) test = test[np.random.permutation(np.arange( len(test)))[:min(args.validation_size, len(test))]] train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) if len(args.gpus) <= 1: updater = training.StandardUpdater(train_iter, optimizer, device=device) else: devices = {'main': args.gpus[0]} for gpu in args.gpus[1:]: devices['gpu{}'.format(gpu)] = gpu updater = training.ParallelUpdater(train_iter, optimizer, devices=devices) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) display_interval = (args.display_interval, 'iteration') snapshot_interval = (args.snapshot_interval, 'iteration') trainer.extend(extensions.Evaluator(test_iter, model, device=device), trigger=display_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=display_interval)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'iteration', display_interval, file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'iteration', display_interval, file_name='accuracy.png')) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ]), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=5)) trainer.extend(extensions.snapshot_object(darknet53, 'darknet53_snapshot.npz'), trigger=training.triggers.MinValueTrigger( 'validation/main/loss', snapshot_interval)) trainer.extend(extensions.snapshot_object(darknet53, 'darknet53_final.npz'), trigger=snapshot_interval) trainer.extend(DarknetShift(optimizer, 'poly', args.iteration)) trainer.extend(CropSizeUpdater(train, [(4 + i) * 32 for i in range(0, 11)])) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--unit', '-u', default=300, type=int, help='number of units') parser.add_argument('--window', '-w', default=3, type=int, help='window size') parser.add_argument('--batchsize', '-b', type=int, default=10000, help='learning minibatch size') parser.add_argument('--epoch', '-e', default=5, type=int, help='number of epochs to learn') parser.add_argument('--model', '-m', choices=['skipgram', 'cbow'], default='skipgram', help='model type ("skipgram", "cbow")') parser.add_argument('--negative-size', default=2, type=int, help='number of negative samples') parser.add_argument('--out-type', '-o', choices=['hsm', 'ns', 'original'], default='ns', help='output model type ("hsm": hierarchical softmax, ' '"ns": negative sampling, "original": ' 'no approximation)') parser.add_argument('--out', default='result', help='Directory to output the result') parser.add_argument('--test', dest='test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() cuda.check_cuda_available() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('Window: {}'.format(args.window)) print('Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('Training model: {}'.format(args.model)) print('Output type: {}'.format(args.out_type)) print('') if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() # Load the dataset train, val, _ = chainer.datasets.get_ptb_words() vocab = chainer.datasets.get_ptb_words_vocabulary() train, val, _, vocab, original_index, ori_con_data = w2v_mi.get_pair( train, val, _, vocab) counts = collections.Counter(train) counts.update(collections.Counter(val)) n_vocab = max(train) + 1 if args.test: train = train[:100] val = val[:100] index2word = {wid: word for word, wid in six.iteritems(vocab)} print('n_vocab: %d' % n_vocab) print('data length: %d' % len(train)) if args.out_type == 'hsm': HSM = L.BinaryHierarchicalSoftmax tree = HSM.create_huffman_tree(counts) loss_func = HSM(args.unit, tree) loss_func.W.data[...] = 0 elif args.out_type == 'ns': cs = [counts[w] for w in range(len(counts))] loss_func = L.NegativeSampling(args.unit, cs, args.negative_size) loss_func.W.data[...] = 0 elif args.out_type == 'original': loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab) else: raise Exception('Unknown output type: {}'.format(args.out_type)) # print('loss:', loss_func) # Choose the model if args.model == 'skipgram': model = SkipGram(n_vocab, args.unit, loss_func, ori_con_data) elif args.model == 'cbow': model = ContinuousBoW(n_vocab, args.unit, loss_func, ori_con_data) else: raise Exception('Unknown model type: {}'.format(args.model)) if args.gpu >= 0: model.to_gpu() # Set up an optimizer optimizer = O.Adam() optimizer.setup(model) # Set up an iterator train_iter = WindowIterator(train, args.window, args.batchsize, original_index) val_iter = WindowIterator(val, args.window, args.batchsize, original_index, repeat=False) # Set up an updater updater = training.StandardUpdater(train_iter, optimizer, converter=convert, device=args.gpu) # Set up a trainer trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( extensions.Evaluator(val_iter, model, converter=convert, device=args.gpu)) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss'])) trainer.extend(extensions.ProgressBar()) trainer.run() # Save the word2vec model with open('word2vec.model', 'w') as f: f.write('%d %d\n' % (len(index2word) - 1, args.unit)) w = cuda.to_cpu(model.embed.W.data) for i, wi in enumerate(w): if i == len(index2word) - 1: print(i) continue v = ' '.join(map(str, wi)) f.write('%s %s\n' % (index2word[i], v))
def test_get_device_from_id_for_builtin_int(self): # builtins.int is from future package and it is different # from builtin int/long on Python 2. assert cuda.get_device_from_id(builtins.int(0)) == cuda.Device(0)
def main(commands=None): parser = argparse.ArgumentParser(description='Segmentation Predict') parser.add_argument('--model', '-m', nargs='+', help='Path to model') parser.add_argument('--config', '-c', nargs='*', default=['examples/configs/seg_resnet.yaml']) parser.add_argument('--val-set', type=int) parser.add_argument('--x-flip', type=int, help='0: no, 1: yes, 2: both (average)', default=0) parser.add_argument('--multiscale', action='store_true') # Args for ensembling parser.add_argument('--ensemble-seg', action='store_true') parser.add_argument('--seg-weight', type=float, nargs='*', default=None) parser.add_argument('--edge-weight', type=float, nargs='*', default=None) parser.add_argument('--gpu', '-g', type=int, default=0) parser.add_argument('--n-process', '-p', type=int, default=30) parser.add_argument('--out', '-o', default='out.csv') parser.add_argument('--test', action='store_true') parser.add_argument('--limit', '-n', type=int, default=0) parser.add_argument('--thresh', '-t', type=float, default=0.1, help='Threshold for edge confidence') parser.add_argument('--save-demo-to', metavar='/path/to/out_demo/dir') parser.add_argument('--overlay-seg', action='store_true') parser.add_argument('--cprofile', action='store_true', help='To profile with cprofile') args = parser.parse_args(commands) configs = [load_config(yaml.load(open(args.config[i]))) for i in range(len(args.config))] master_config = configs[0] comm = chainermn.create_communicator(communicator_name='pure_nccl') device = comm.intra_rank + args.gpu print('Device = {}'.format(device)) if len(configs) == 1 and len(args.model) >= 2: # Duplicate same config configs = configs * len(args.model) else: assert len(configs) == len(args.model), "# of configs and models don't match." # Setup models models = [] for i in range(len(args.model)): model = setup_model(configs[i], args.x_flip) chainer.serializers.load_npz(args.model[i], model) models.append(model) if len(models) == 1: model = models[0] else: ensembler_cls = MultiScaleModelEnsembler if args.multiscale else ModelEnsembler model = ensembler_cls(models, ensemble_seg=args.ensemble_seg, seg_weight=args.seg_weight, edge_weight=args.edge_weight) with cuda.get_device_from_id(device): model.to_gpu() # Setup dataset if comm.rank == 0: if args.test: dataset = RSNASubmissionDataset() else: if args.val_set is not None: master_config['val_set'] = args.val_set dataset = RSNATrainDataset() if args.val_set is not None: master_config['val_set'] = args.val_set if master_config['val_set'] == -1: val_mask = dataset.patient_df['withinTestRange'].values == 1 val_indices = val_mask.nonzero()[0] else: _, val_indices = create_train_val_indices(np.ones(len(dataset), dtype=bool), master_config['val_set']) dataset = dataset.slice[val_indices, ('dicom_data', 'img', 'bbox')] if args.limit and args.limit < len(dataset): dataset, _ = split_dataset(dataset, args.limit) else: dataset = None dataset = chainermn.scatter_dataset(dataset, comm) if args.cprofile: import cProfile import pstats import io pr = cProfile.Profile() pr.enable() if comm.rank == 0: print('Extracting network outputs...') outputs = [] gt_bboxes = [] for i in range(len(dataset)): if comm.rank == 0 and i % 100 == 0: print('Processing {}-th sample...'.format(i)) if args.test: dicom_data, image = dataset[i] patient_id = dicom_data.PatientID gt_bbox = np.empty((0, 4), dtype=np.float32) else: dicom_data, image, gt_bbox = dataset[i] patient_id = dicom_data.PatientID if master_config['data_augmentation']['window_width'] > 1.0: image = (image - 128) * master_config['data_augmentation']['window_width'] + 128 image = np.clip(image, 0, 255) with cuda.get_device_from_id(device): h_seg, h_hor, h_ver = [x[0] for x in model.extract([image])] outputs.append((patient_id, image, h_seg, h_hor, h_ver)) gt_bboxes.append((patient_id, gt_bbox)) if comm.rank == 0: for i in range(1, comm.size): other_outputs = comm.recv_obj(i) outputs.extend(other_outputs) other_gt_bboxes = comm.recv_obj(i) gt_bboxes.extend(other_gt_bboxes) else: comm.send_obj(outputs, 0) comm.send_obj(gt_bboxes, 0) print('Bye {}.'.format(comm.rank)) exit(0) outputs = sorted(outputs, key=lambda x: x[0]) gt_bboxes = sorted(gt_bboxes, key=lambda x: x[0]) print('Done.') print('Postprocessing...') postprocessor = Postprocessor(master_config['downscale'], args.thresh, master_config['size_thresh'], master_config['edge_conf_operation']) with multiprocessing.Pool(args.n_process) as p: results = p.map(postprocessor.postprocess, outputs) results = sorted(results, key=lambda x: x[0]) print('Done.') outputs_ids = [x[0] for x in outputs] results_ids = [x[0] for x in results] assert outputs_ids == results_ids print('Dumping final results...') pred_manager = PredictionsManager() n_positive = 0 for result in results: patient_id, bbox, label, score = result pred_manager.add_prediction(patient_id, bbox, score) if len(bbox) > 0: n_positive += 1 print('Complete!') print('{} / {} are predicted as positive.'.format(n_positive, len(dataset))) with open(args.out, 'w') as f: pred_manager.dump(f) if args.save_demo_to: print('Start saving demos...') os.makedirs(args.save_demo_to, exist_ok=True) demo_saver = DemoSaver(args.save_demo_to, master_config['downscale'], args.overlay_seg) with multiprocessing.Pool(args.n_process) as p: p.map(demo_saver.save, list(zip(results, outputs, gt_bboxes))) if args.cprofile: pr.disable() s = io.StringIO() sortby = 'time' ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.print_stats() print(s.getvalue()) pr.dump_stats('prof.cprofile'.format(args.out, 0))
def test_gpu_to_another_gpu(self): src = cuda.cupy.arange(1, 5, dtype=numpy.float32) with cuda.get_device_from_id(1): dst = cuda.cupy.zeros_like(src) cuda.copyto(dst, src) cuda.cupy.testing.assert_array_equal(dst, src)
def forward(self, x, **kwargs): """forward(self, x, finetune=False) Invokes the forward propagation of BatchNormalization. In training mode, the BatchNormalization computes moving averages of mean and variance for evaluation during training, and normalizes the input using batch statistics. .. warning:: ``test`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', False)``. See :func:`chainer.using_config`. Args: x (Variable): Input variable. finetune (bool): If it is in the training mode and ``finetune`` is ``True``, BatchNormalization runs in fine-tuning mode; it accumulates the input array to compute population statistics for normalization, and normalizes the input using batch statistics. """ finetune, = argument.parse_kwargs( kwargs, ('finetune', False), test='test argument is not supported anymore. ' 'Use chainer.using_config') if self.avg_mean is None: param_shape = tuple([ d for i, d in enumerate(x.shape) if i not in self.axis]) self._initialize_params(param_shape) gamma = self.gamma if gamma is None: with cuda.get_device_from_id(self._device_id): gamma = self.xp.ones( self.avg_mean.shape, dtype=x.dtype) beta = self.beta if beta is None: with cuda.get_device_from_id(self._device_id): beta = self.xp.zeros( self.avg_mean.shape, dtype=x.dtype) if configuration.config.train: if finetune: self.N += 1 decay = 1. - 1. / self.N else: decay = self.decay ret = functions.batch_normalization( x, gamma, beta, eps=self.eps, running_mean=self.avg_mean, running_var=self.avg_var, decay=decay, axis=self.axis) else: # Use running average statistics or fine-tuned statistics. mean = self.avg_mean var = self.avg_var ret = functions.fixed_batch_normalization( x, gamma, beta, mean, var, self.eps, axis=self.axis) return ret
chainer.config.train = False # All the codes will run in test mode dataset = [] with open(args.dataset) as list_file: for line in list_file: pair = line.strip().split() path = os.path.join(args.basepath, pair[0]) dataset.append((path, np.int32(pair[1]))) assert len(dataset) % args.batchsize == 0 print('Loading Caffe model file %s...' % args.model) func = caffe.CaffeFunction(args.model) print('Loaded') if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() func.to_gpu() if args.model_type == 'alexnet' or args.model_type == 'caffenet': in_size = 227 mean_image = np.load(args.mean) def forward(x, t): y, = func(inputs={'data': x}, outputs=['fc8']) return F.softmax_cross_entropy(y, t), F.accuracy(y, t) elif args.model_type == 'googlenet': in_size = 224 # Constant mean over spatial pixels mean_image = np.ndarray((3, 256, 256), dtype=np.float32) mean_image[0] = 104 mean_image[1] = 117
def test_forward_multi_gpu(self): with cuda.get_device_from_id(1): self.link.to_gpu() x = cuda.to_gpu(self.x) with cuda.get_device_from_id(0): self.check_forward(x)
def trainOneMiniBatch(self, train_mode, decSent, encInfo, args, dropout_rate): if args.gpu >= 0: # encとdecが別GPUの場合 cuda.get_device_from_id(args.gpu).use() cMBSize = encInfo.cMBSize aList, finalHS = self.prepareDecoder(encInfo) xp = cuda.get_array_module(encInfo.lstmVars[0].data) total_loss = chainer.Variable(xp.zeros((), dtype=xp.float32)) # 初期化 total_loss_val = 0 # float correct = 0 incorrect = 0 proc = 0 decoder_proc = len(decSent) - 1 # ここで処理するdecoder側の単語数 ####################################################################### # 1, decoder側の入力単語embeddingsをまとめて取得 decEmbListCopy = self.getDecoderInputEmbeddings( decSent[:decoder_proc], args) decSent = xp.array(decSent) # GPU上に移動 ####################################################################### # 2, decoder側のRNN部分を計算 # h4_list_copy = [0] * decoder_proc # lstm_states_list_copy = [0] * decoder_proc prev_h4 = None prev_lstm_states = None trunc_loss = chainer.Variable(xp.zeros((), dtype=xp.float32)) for index in range(decoder_proc): # decoder_len -1 if index == 0: t_lstm_states = encInfo.lstmVars t_finalHS = finalHS else: # t_lstm_states = lstm_states_list_copy[index - 1] # t_finalHS = h4_list_copy[index - 1] t_lstm_states = prev_lstm_states t_finalHS = prev_h4 # decoder LSTMを一回ぶん計算 hOut, lstm_states = self.processDecLSTMOneStep( decEmbListCopy[index], t_lstm_states, t_finalHS, args, dropout_rate) # lstm_statesをキャッシュ # lstm_states_list_copy[index] = lstm_states prev_lstm_states = lstm_states # attentionありの場合 contextベクトルを計算 finalHS = self.calcAttention(hOut, encInfo.attnList, aList, encInfo.encLen, cMBSize, args) # finalHSをキャッシュ # h4_list_copy[index] = finalHS prev_h4 = finalHS ####################################################################### # 3, output(softmax)層の計算 # for index in reversed(range(decoder_proc)): # 2で用意した copyを使って最終出力層の計算をする # oVector = self.generateWord(h4_list_copy[index], encInfo.encLen, cMBSize, args, dropout_rate) oVector = self.generateWord(prev_h4, encInfo.encLen, cMBSize, args, dropout_rate) # 正解データ correctLabel = decSent[index + 1] # xp proc += (xp.count_nonzero(correctLabel + 1)) # 必ずminibatchsizeでわる closs = chainF.softmax_cross_entropy(oVector, correctLabel, normalize=False) # これで正規化なしのloss cf. seq2seq-attn code total_loss_val += closs.data * cMBSize if train_mode > 0: # 学習データのみ backward する # total_loss += closs trunc_loss += closs # 実際の正解数を獲得したい t_correct = 0 t_incorrect = 0 # Devのときは必ず評価,学習データのときはオプションに従って評価 if train_mode == 0: # or args.doEvalAcc > 0: # 予測した単語のID配列 CuPy pred_arr = oVector.data.argmax(axis=1) # 正解と予測が同じなら0になるはず => 正解したところは0なので,全体から引く t_correct = (correctLabel.size - xp.count_nonzero(correctLabel - pred_arr)) # 予測不要の数から正解した数を引く # +1はbroadcast t_incorrect = xp.count_nonzero(correctLabel + 1) - t_correct correct += t_correct incorrect += t_incorrect if train_mode > 0 and (index + 1) % args.truncate_length == 0: trunc_loss.backward() #### if train_mode > 0: # 学習時のみ backward する total_loss.backward() return total_loss_val, (correct, incorrect, decoder_proc, proc)
def __call__(self, batch, device=None, padding=None): """Concatenate data and transfer them to GPU asynchronously. See also :func:`chainer.dataset.concat_examples`. Args: batch (list): A list of examples. device (int): Device ID to which each array is sent. padding: Scalar value for extra elements. Returns: Array, a tuple of arrays, or a dictionary of arrays. The type depends on the type of each example in the batch. """ if len(batch) == 0: raise ValueError('batch is empty') first_elem = batch[0] if len(self._conveyor) == 0: self._device = device # device is set at first call if device is not None and device >= 0 and self._stream is None: with cuda.get_device_from_id(device): self._stream = cuda.Stream(non_blocking=True) if device is not self._device: raise ValueError('device is different') if self.compute_stream is not None: self._event1.synchronize() self._event1.record(stream=self.compute_stream) with cuda.get_device_from_id(device): if isinstance(first_elem, tuple): result = [] if not isinstance(padding, tuple): padding = [padding] * len(first_elem) for i in six.moves.range(len(first_elem)): self._conveyor[i].put( _concat_arrays([example[i] for example in batch], padding[i])) for i in six.moves.range(len(first_elem)): result.append(self._conveyor[i].get(sync=self._sync_get)) if self.compute_stream is not None: self._event2.record(stream=self._stream) self.compute_stream.wait_event(self._event2) return tuple(result) elif isinstance(first_elem, dict): result = {} if not isinstance(padding, dict): padding = {key: padding for key in first_elem} for key in first_elem: self._conveyor[key].put( _concat_arrays([example[key] for example in batch], padding[key])) for key in first_elem: result[key] = self._conveyor[key].get(sync=self._sync_get) if self.compute_stream is not None: self._event2.record(stream=self._stream) self.compute_stream.wait_event(self._event2) return result else: return to_device(device, _concat_arrays(batch, padding))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', '-m', type=str, required=True, help='model data, saved by train_ptb.py') parser.add_argument('--primetext', '-p', type=str, required=True, default='', help='base text data, used for text generation') parser.add_argument('--seed', '-s', type=int, default=123, help='random seeds for text generation') parser.add_argument('--unit', '-u', type=int, default=650, help='number of units') parser.add_argument('--sample', type=int, default=1, help='negative value indicates NOT use random choice') parser.add_argument('--length', type=int, default=20, help='length of the generated text') parser.add_argument('--gpu', type=int, default=-1, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() np.random.seed(args.seed) chainer.config.train = False xp = cuda.cupy if args.gpu >= 0 else np # load vocabulary vocab = chainer.datasets.get_ptb_words_vocabulary() ivocab = {} for c, i in vocab.items(): ivocab[i] = c # should be same as n_units , described in train_ptb.py n_units = args.unit lm = train_ptb.RNNForLM(len(vocab), n_units) model = L.Classifier(lm) serializers.load_npz(args.model, model) if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() model.to_gpu() model.predictor.reset_state() primetext = args.primetext if isinstance(primetext, six.binary_type): primetext = primetext.decode('utf-8') if primetext in vocab: prev_word = chainer.Variable(xp.array([vocab[primetext]], xp.int32)) else: print('ERROR: Unfortunately ' + primetext + ' is unknown.') exit() prob = F.softmax(model.predictor(prev_word)) sys.stdout.write(primetext + ' ') for i in six.moves.range(args.length): prob = F.softmax(model.predictor(prev_word)) if args.sample > 0: probability = cuda.to_cpu(prob.data)[0].astype(np.float64) probability /= np.sum(probability) index = np.random.choice(range(len(probability)), p=probability) else: index = np.argmax(cuda.to_cpu(prob.data)) if ivocab[index] == '<eos>': sys.stdout.write('.') else: sys.stdout.write(ivocab[index] + ' ') prev_word = chainer.Variable(xp.array([index], dtype=xp.int32)) sys.stdout.write('\n')
dataset = [] with open(args.dataset) as list_file: for line in list_file: pair = line.strip().split() path = os.path.join(args.basepath, pair[0]) dataset.append((path, np.int32(pair[1]))) assert len(dataset) % args.batchsize == 0 print('Loading Caffe model file %s...' % args.model, file=sys.stderr) func = caffe.CaffeFunction(args.model) print('Loaded', file=sys.stderr) if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() func.to_gpu() if args.model_type == 'alexnet' or args.model_type == 'caffenet': in_size = 227 mean_image = np.load(args.mean) def forward(x, t): y, = func(inputs={'data': x}, outputs=['fc8']) return F.softmax_cross_entropy(y, t), F.accuracy(y, t) elif args.model_type == 'googlenet': in_size = 224 # Constant mean over spatial pixels mean_image = np.ndarray((3, 256, 256), dtype=np.float32) mean_image[0] = 104 mean_image[1] = 117
def forward(self, x, **kwargs): """forward(self, x, finetune=False) Invokes the forward propagation of BatchNormalization. In training mode, the BatchNormalization computes moving averages of mean and variance for evaluation during training, and normalizes the input using batch statistics. .. warning:: ``test`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', False)``. See :func:`chainer.using_config`. Args: x (Variable): Input variable. finetune (bool): If it is in the training mode and ``finetune`` is ``True``, BatchNormalization runs in fine-tuning mode; it accumulates the input array to compute population statistics for normalization, and normalizes the input using batch statistics. """ finetune, = argument.parse_kwargs( kwargs, ('finetune', False), test='test argument is not supported anymore. ' 'Use chainer.using_config') if self.avg_mean is None: param_shape = tuple( [d for i, d in enumerate(x.shape) if i not in self.axis]) self._initialize_params(param_shape) gamma = self.gamma if gamma is None: with cuda.get_device_from_id(self._device_id): gamma = self.xp.ones(self.avg_mean.shape, dtype=x.dtype) beta = self.beta if beta is None: with cuda.get_device_from_id(self._device_id): beta = self.xp.zeros(self.avg_mean.shape, dtype=x.dtype) if configuration.config.train: if finetune: self.N += 1 decay = 1. - 1. / self.N else: decay = self.decay ret = functions.batch_normalization(x, gamma, beta, eps=self.eps, running_mean=self.avg_mean, running_var=self.avg_var, decay=decay, axis=self.axis) else: # Use running average statistics or fine-tuned statistics. mean = self.avg_mean var = self.avg_var ret = functions.fixed_batch_normalization(x, gamma, beta, mean, var, self.eps, axis=self.axis) return ret
def initialize_params(self, in_size): with cuda.get_device_from_id(self._device_id): self.upward._initialize_params(in_size) self._initialize_params()
def main(): parser = argparse.ArgumentParser(description='Chainer example: VAE') parser.add_argument('--initmodel', '-m', default='', help='Initialize the model from given file') parser.add_argument('--resume', '-r', default='', help='Resume the optimization from snapshot') parser.add_argument('--gpu', '-g', default=0, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--epoch', '-e', default=100, type=int, help='number of epochs to learn') parser.add_argument('--dimz', '-z', default=10, type=int, help='dimention of encoded vector') parser.add_argument('--batchsize', '-b', type=int, default=10, help='learning minibatch size') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# dim z: {}'.format(args.dimz)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') if 0 <= args.gpu: cuda.get_device_from_id(args.gpu).use() # net内VAEオブジェクトの生成 model = net_img.VAE(3, 10, 64) if 0 <= args.gpu: model.to_gpu() # GPUを使うための処理 # optimizer(パラメータ更新用) optimizer = chainer.optimizers.Adam() optimizer.setup(model) # モデルの読み込み npzはnumpy用 """ if args.initmodel: chainer.serializers.load_npz(args.initmodel, model) """ traini = np.load('birds_img.npy') traini = traini.reshape((len(traini), 3, 128, 128)) print(traini.shape) train, test = train_test_split(traini, test_size=0.2, random_state=50) print("======") print(train.shape) #------------------イテレーターによるデータセットの設定----------------------------------- train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) #--------------------------------------------------------------- # Set up an updater. StandardUpdater can explicitly specify a loss function # used in the training with 'loss_func' option updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu, loss_func=model.get_loss_func()) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu, eval_func=model.get_loss_func(k=10))) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/rec_loss', 'validation/main/rec_loss', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # トレーナーの実行 trainer.run() # Visualize the results def save_images(x, filename): import matplotlib.pyplot as plt fig, ax = plt.subplots(3, 3, figsize=(9, 9), dpi=100) for ai, xi in zip(ax.flatten(), x): ai.imshow(xi.reshape(128, 128)) fig.savefig(filename) serializers.save_npz("birds_img.npz", model)
def test_gpu_to_another_gpu(self): src = cuda.cupy.array(self.src_data) with cuda.get_device_from_id(1): dst = self._get_dst() backend.copyto(dst, src) cuda.cupy.testing.assert_array_equal(dst, src)
def test_forward_gpu_multi(self): with cuda.get_device_from_id(0): self.link.to_gpu() x = cuda.to_gpu(self.x) with cuda.get_device_from_id(1): self.check_forward(x)
def main(): parser = argparse.ArgumentParser(description='Chainer YOLOv3 VOC Train') parser.add_argument('--batchsize', '-b', type=int, default=8) parser.add_argument('--iteration', '-i', type=int, default=50200) parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[]) parser.add_argument('--out', '-o', default='yolov3-voc-result') parser.add_argument('--seed', type=int, default=0) parser.add_argument('--display_interval', type=int, default=100) parser.add_argument('--snapshot_interval', type=int, default=100) parser.add_argument('--ignore_thresh', type=float, default=0.5) parser.add_argument('--thresh', type=float, default=0.4) parser.add_argument('--darknet', default='') parser.add_argument('--validation_size', type=int, default=32) args = parser.parse_args() print('GPUs: {}'.format(args.gpus)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# iteration: {}'.format(args.iteration)) print('') random.seed(args.seed) np.random.seed(args.seed) base = None if len(args.darknet) > 0: darknet53 = Darknet53(20) serializers.load_npz(args.darknet, darknet53) base = darknet53.base yolov3 = YOLOv3(20, base, ignore_thresh=args.ignore_thresh) model = YOLOv3Loss(yolov3) device = -1 if len(args.gpus) > 0: device = args.gpus[0] cuda.cupy.random.seed(args.seed) cuda.get_device_from_id(args.gpus[0]).use() if len(args.gpus) == 1: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(optimizer_hooks.WeightDecay(0.0005), 'hook_decay') optimizer.add_hook(optimizer_hooks.GradientClipping(10.0), 'hook_grad_clip') train = VOCBboxDataset(split='train') test = VOCBboxDataset(split='val') train = YOLOVOCDataset(train, classifier=False, jitter=0.3, hue=0.1, sat=1.5, val=1.5) #train = train[np.arange(args.batchsize)] test = YOLOVOCDataset(test, classifier=False) test = test[np.random.permutation(np.arange(len(test)))[:min(args.validation_size, len(test))]] train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) if len(args.gpus) <= 1: updater = training.StandardUpdater( train_iter, optimizer, converter=concat_yolo, device=device) else: devices = {'main': args.gpus[0]} for gpu in args.gpus[1:]: devices['gpu{}'.format(gpu)] = gpu updater = training.ParallelUpdater( train_iter, optimizer, converter=concat_yolo, devices=devices) trainer = training.Trainer( updater, (args.iteration, 'iteration'), out=args.out) display_interval = (args.display_interval, 'iteration') snapshot_interval = (args.snapshot_interval, 'iteration') trainer.extend(extensions.Evaluator( test_iter, model, converter=concat_yolo, device=device), trigger=display_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=display_interval)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'iteration', display_interval, file_name='loss.png')) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'validation/main/loss', 'elapsed_time']), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_snapshot.npz'), trigger=training.triggers.MinValueTrigger( 'validation/main/loss', snapshot_interval)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_final.npz'), trigger=snapshot_interval) trainer.extend(DarknetShift( optimizer, 'steps', args.iteration, burn_in=1000, steps=[args.iteration-10200,args.iteration-5200], scales=[0.1,0.1] )) trainer.extend(CropSizeUpdater(train, [(10+i)*32 for i in range(0,5)], args.iteration - 200)) detector = YOLOv3Predictor(yolov3, thresh=args.thresh) class_names = load_list('./data/voc.names') trainer.extend(YOLODetection( detector, ['./data/image/dog.jpg'], class_names, size=(416, 416) ,thresh=args.thresh, trigger=display_interval, device=device )) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=50) parser.add_argument('--epoch', '-e', type=int, default=1000) parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--out', '-o', default='') parser.add_argument('--resume', '-r', default='') parser.add_argument('--n_noise', '-n', type=int, default=100) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--snapshot_interval', type=int, default=1000) parser.add_argument('--display_interval', type=int, default=100) args = parser.parse_args() out_dir = 'result' if args.out != '': out_dir = '{}/{}'.format(out_dir, args.out) print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# n_hidden: {}'.format(args.n_noise)) print('# epoch: {}'.format(args.epoch)) print('# out: {}'.format(out_dir)) print('') gen = Generator(n_noise=args.n_noise, n_class=10) dis = Discriminator(n_class=10) if args.gpu >= 0: cuda.get_device_from_id(args.gpu) gen.to_gpu() dis.to_gpu() def make_optimizer(model, alpha=0.0002, beta1=0.5): optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001), 'hook_dec') return optimizer gen_optimizer = make_optimizer(gen) dis_optimizer = make_optimizer(dis) train, _ = chainer.datasets.get_cifar10(withlabel=True) transformer = lambda data: (gen.make_noise(), ) + data train = chainer.datasets.TransformDataset(train, transformer) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) updater = CGANUpdater(models=(gen, dis), iterator=train_iter, optimizer={ 'gen': gen_optimizer, 'dis': dis_optimizer }, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=out_dir) snapshot_interval = (args.snapshot_interval, 'iteration') display_interval = (args.display_interval, 'iteration') trainer.extend( extensions.snapshot(filename='snapshot_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( gen, 'gen_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( dis, 'dis_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=display_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'gen/loss', 'dis/loss', ]), trigger=display_interval) trainer.extend( extensions.PlotReport(['gen/loss', 'dis/loss'], x_key='iteration', trigger=display_interval)) trainer.extend(extensions.ProgressBar(update_interval=10)) gen_func = lambda data: gen(data[0], data[1]) def data_func(gen): def _data_func(index): return (gen.make_noise(), index // 10) return _data_func trainer.extend( GenerateImage(gen_func, data_func(gen), file_name='{}/{}'.format( out_dir, 'preview/{.updater.iteration:0>8}.png'), rows=10, cols=10, seed=800, device=args.gpu, trigger=snapshot_interval)) trainer.run()