def forward(self): x = chainer.Variable(self.x) return functions.max_pooling_2d(x, 3, stride=2, pad=1, cover_all=False)
def test_forward_gpu(self): self.l.to_gpu() with self.h: self.l(chainer.Variable(cuda.to_gpu(self.x)))
def test_fowward_gpu(self): self.f(chainer.Variable(cuda.to_gpu(self.x)))
y_batch = xp.asarray(dataset[i + 1:i + 2]) state, loss = forward_one_step(x_batch, y_batch, state, train=False) sum_log_perp += loss.data.reshape(()) return math.exp(cuda.to_cpu(sum_log_perp) / (dataset.size - 1)) # Learning loop whole_len = train_data.shape[0] jump = whole_len // batchsize cur_log_perp = xp.zeros(()) epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state() accum_loss = chainer.Variable(xp.zeros((), dtype=np.float32)) print('going to train {} iterations'.format(jump * n_epoch)) for i in six.moves.range(jump * n_epoch): x_batch = xp.array([ train_data[(jump * j + i) % whole_len] for j in six.moves.range(batchsize) ]) y_batch = xp.array([ train_data[(jump * j + i + 1) % whole_len] for j in six.moves.range(batchsize) ]) state, loss_i = forward_one_step(x_batch, y_batch, state) accum_loss += loss_i cur_log_perp += loss_i.data.reshape(()) if (i + 1) % bprop_len == 0: # Run truncated BPTT
def forward(self): x = chainer.Variable(self.x) return self.mlp(x)
def check_unstride_forward(self, xp): x = xp.arange(12, dtype=self.dtype).reshape((3, 4))[::-1] v = chainer.Variable(x) y = F.as_strided(v, (12, ), (1, ), 0) y_expected = xp.arange(12, dtype=self.dtype) testing.assert_allclose(y.array, y_expected)
def extract(self, images, layers=['fc5']): self._layer_names = layers x = chainer.Variable(self.xp.asarray(images)) return chainer.cuda.to_cpu(self(x).data)
def check_forward(self, x_data): x = chainer.Variable(x_data) y = self.link(x) self.assertEqual(y.data.dtype, numpy.float32) testing.assert_allclose(self.y, y.data)
def backward(self, indexes, grad_outputs): x, W, gy = self.get_retained_inputs() device = backend.get_device_from_array(x.data) xp = device.xp if 0 in indexes: gx = chainer.Variable(xp.zeros_like(x.data)) if 1 in indexes: gW = chainer.Variable(xp.zeros_like(W.data)) if 2 in indexes: ggy = chainer.Variable(xp.zeros_like(gy.data)) ggx, _, ggW = grad_outputs pos_neg_mask = xp.ones(self.sample_size + 1) pos_neg_mask[0] *= -1 with chainer.using_device(device): arange = xp.arange(len(self.ignore_mask)) for i in arange[self.ignore_mask]: # Partial forward pass to obtain intermediate `Variable`s ix = x[i] k = self.samples[i] if self.reduce == 'sum': igy = gy else: igy = gy[i] w = W[k] f = chainer.functions.flatten( chainer.functions.matmul(w, ix[:, None])) * pos_neg_mask sigf = chainer.functions.sigmoid(f) g = chainer.functions.broadcast_to(igy, f.shape) * sigf \ * pos_neg_mask dgW_dg = chainer.functions.flatten( chainer.functions.matmul(ggW[k], ix[:, None])) * pos_neg_mask dgW_df = chainer.functions.broadcast_to(igy, f.shape) \ * _sigmoid_grad(f, sigf, dgW_dg) * pos_neg_mask dgx_dg = chainer.functions.flatten( chainer.functions.matmul(ggx[i][None, :], w, transb=True)) dgx_df = chainer.functions.broadcast_to(igy, f.shape) \ * _sigmoid_grad(f, sigf, dgx_dg) if 0 in indexes: # derivative of gx dgx = chainer.functions.matmul(w, dgx_df[:, None], transa=True) # derivative of gW dgx += chainer.functions.matmul(g[None, :], ggW[k]).T dgx += chainer.functions.matmul( w, dgW_df[:, None], transa=True) gx = chainer.functions.scatter_add( gx, i, chainer.functions.flatten(dgx)) if 1 in indexes: # derivative of gx shape = ggx[i].shape for ik, ig, idgx_df in six.moves.zip(k, g, dgx_df): ig = chainer.functions.broadcast_to(ig, shape) idgx_df = chainer.functions.broadcast_to(idgx_df, shape) gW = chainer.functions.scatter_add( gW, ik, ig * ggx[i] + idgx_df * ix) # derivative of gW gW = chainer.functions.scatter_add( gW, k, chainer.functions.matmul(dgW_df[:, None], ix[None, :])) if 2 in indexes: dgx_dg *= pos_neg_mask dggy = chainer.functions.sum((dgx_dg + dgW_dg) * sigf) if self.reduce == 'sum': ggy += dggy else: ggy = chainer.functions.scatter_add(ggy, i, dggy) ret = [] if 0 in indexes: ret.append(gx) if 1 in indexes: ret.append(gW) if 2 in indexes: ret.append(ggy) return ret
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', '-m', type=str, required=True, help='model data, saved by train.py') parser.add_argument('--file', '-f', type=str, required=True, help='input text file, used for reaction analysis') parser.add_argument('--label', type=str, default=None, help='label file for calculating accuracy') parser.add_argument('--input_vocab', '-i', type=str, default='data/input_vocab.bin', help='input text vocaburaly dictionary') parser.add_argument('--label_vocab', '-l', type=str, default='data/label_vocab.bin', help='input label vocaburaly dictionary') parser.add_argument('--seqlen', type=int, required=True, help='sequence length') parser.add_argument('-n', type=int, default=3, help='number of candidates') parser.add_argument('--fraction', type=float, default=0.0, help='split ratio of dataset (0 means all data goes to test)') parser.add_argument('--unit', '-u', type=int, default=650, help='number of units') parser.add_argument('--gpu', type=int, default=-1, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() xp = cuda.cupy if args.gpu >= 0 else np # load dataset and vocabulary seq_len = args.seqlen # For testing with labels # ds = dataset.Dataset(args.file, label=args.label, input_vocab=args.input_vocab, label_vocab=args.label_vocab, seq_len=seq_len) ds = dataset.Dataset(args.file, label=args.label, input_vocab=args.input_vocab, label_vocab=args.label_vocab, seq_len=seq_len, fraction=args.fraction) _, test = ds.get_inputs_and_labels() input_vocab, label_vocab = ds.get_vocab() input_ivocab = {i: c for c, i in input_vocab.items()} label_ivocab = {i: c for c, i in label_vocab.items()} # should be same as n_units, described in train.py n_units = args.unit lm = net.RNNLM(len(input_vocab), len(label_vocab), n_units, train=False) model = L.Classifier(lm) serializers.load_npz(args.model, model) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() n_top = args.n n_match = 0 n_total = 0 has_label = (len(test[0]) == len(test[1])) sys.stdout.write('\n') for i, data in enumerate(test[0]): print('[input {0}/{1}]'.format(i + 1, len(test[0])), ' '.join(iconv(data, input_ivocab))) model.predictor.reset_state() for j in six.moves.range(seq_len): word = chainer.Variable(xp.array([data[j]]), volatile='on') pred = F.softmax(model.predictor(word)) if j == seq_len - 1: if args.gpu >= 0: pred_data = cuda.to_cpu(pred.data) else: pred_data = pred.data indice = pred_data[0].argsort()[-n_top:][::-1] probs = pred_data[0][indice] result = [(label_ivocab[idx], prob) for (idx, prob) in zip(indice, probs)] if has_label: y = test[1][i] print('[suggested reactions] %s' % result) n_total += 1 if indice[0] == y: print(label_ivocab[indice[0]], '(prediction)', '==', label_ivocab[y], '(actual)', '? => MATCH') n_match += 1 else: print(label_ivocab[indice[0]], '(prediction)', '==', label_ivocab[y], '(actual)', '? => NOT MATCH') else: print('[suggested reactions] %s' % result) if has_label: print('cumulative accuracy=%f' % (n_match / n_total)) sys.stdout.write('\n')
def test_invalid_size(self): with self.assertRaises(type_check.InvalidType): self.link(chainer.Variable(self.x))
model = L.Classifier(MyChain(), lossfun=F.softmax_cross_entropy) chainer.serializers.load_npz("result/CNN.model", model) cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) xp = int(frame.shape[1] / 2) yp = int(frame.shape[0] / 2) d = 28 * 2 cv2.rectangle(gray, (xp - d, yp - d), (xp + d, yp + d), color=0, thickness=2) cv2.imshow('gray', gray) if cv2.waitKey(10) == 113: break gray = cv2.resize(gray[yp - d:yp + d, xp - d:xp + d], (28, 28)) img = np.zeros((28, 28), dtype=np.float32) img[np.where(gray > 64)] = 1 img = 1 - np.asarray(img, dtype=np.float32) # 0〜1に正規化してみる img = img[np.newaxis, np.newaxis, :, :] # 4次元行列に変換(1x1x8x8,バッチ数xチャンネル数x縦x横) x = chainer.Variable(img) y = model.predictor(x) c = F.softmax(y).data.argmax() print(c) cap.release()
def check_forward(self, h_data, c_data, xs_data): if self.hidden_none: h = c = None else: h = chainer.Variable(h_data) c = chainer.Variable(c_data) xs = [chainer.Variable(x) for x in xs_data] hy, cy, ys = self.rnn(h, c, xs) assert hy.shape == h_data.shape assert cy.shape == c_data.shape assert len(xs) == len(ys) for x, y in zip(xs, ys): assert len(x) == len(y) assert y.shape[1] == self.out_size * 2 self.rnn.to_cpu() for batch, seq in enumerate(self.xs): for layer in range(self.n_layers): # forward di = 0 layer_idx = layer * 2 + di p = self.rnn[layer_idx] h_prev = self.h[layer_idx, batch] c_prev = self.c[layer_idx, batch] hs_f = [] for x in seq: i = sigmoid(x.dot(p.w0.array.T) + h_prev.dot(p.w4.array.T) + p.b0.array + p.b4.array) f = sigmoid(x.dot(p.w1.array.T) + h_prev.dot(p.w5.array.T) + p.b1.array + p.b5.array) c_bar = numpy.tanh(x.dot(p.w2.array.T) + h_prev.dot(p.w6.array.T) + p.b2.array + p.b6.array) o = sigmoid( x.dot(p.w3.array.T) + h_prev.dot(p.w7.array.T) + p.b3.array + p.b7.array) e_c = (f * c_prev + i * c_bar) e_h = o * numpy.tanh(e_c) h_prev = e_h c_prev = e_c hs_f.append(e_h) testing.assert_allclose(hy.array[layer_idx, batch], h_prev) testing.assert_allclose(cy.array[layer_idx, batch], c_prev) # backward di = 1 layer_idx = layer * 2 + di p = self.rnn[layer_idx] h_prev = self.h[layer_idx, batch] c_prev = self.c[layer_idx, batch] hs_b = [] for x in reversed(seq): i = sigmoid(x.dot(p.w0.array.T) + h_prev.dot(p.w4.array.T) + p.b0.array + p.b4.array) f = sigmoid(x.dot(p.w1.array.T) + h_prev.dot(p.w5.array.T) + p.b1.array + p.b5.array) c_bar = numpy.tanh(x.dot(p.w2.array.T) + h_prev.dot(p.w6.array.T) + p.b2.array + p.b6.array) o = sigmoid( x.dot(p.w3.array.T) + h_prev.dot(p.w7.array.T) + p.b3.array + p.b7.array) e_c = (f * c_prev + i * c_bar) e_h = o * numpy.tanh(e_c) h_prev = e_h c_prev = e_c hs_b.append(e_h) testing.assert_allclose(hy.array[layer_idx, batch], h_prev) testing.assert_allclose(cy.array[layer_idx, batch], c_prev) hs_b.reverse() seq = [numpy.concatenate([hfi, hbi], axis=0) for (hfi, hbi) in zip(hs_f, hs_b)] for y, ey in zip(ys[batch].array, seq): testing.assert_allclose(y, ey)
def decode_cupy(self, z): with chainer.using_config('train', False), chainer.no_backprop_mode(): z = chainer.Variable(z.T) x = chf.exp(self.decode(z)).data.T # exp(log(power)) = power return x
def get_onehot(num): return chainer.Variable(np.array([num], dtype=np.int32))
def check_forward(self, x1_data, x2_data, axis, y_expected): x1 = chainer.Variable(x1_data) x2 = chainer.Variable(x2_data) y = functions.bias(x1, x2, axis) testing.assert_allclose(y_expected, y.data)
def check_broadcast_forward(self, xp): x = xp.arange(12, dtype=self.dtype).reshape((3, 4)).copy() v = chainer.Variable(x) y = F.as_strided(v, (2, 3, 4), (0, 4, 1), 0) y_expected = _broadcast_to(xp, x, (2, 3, 4)) testing.assert_allclose(y.array, y_expected)
def _export(model, args, filename, export_params, graph_name, save_text, opset_version, input_names, output_names, return_named_inout, external_converters, external_opset_imports, input_shapes): if opset_version is None: opset_version = min(int(onnx.defs.onnx_opset_version()), MAXIMUM_OPSET_VERSION) elif opset_version < MINIMUM_OPSET_VERSION or \ opset_version > MAXIMUM_OPSET_VERSION: warnings.warn( 'ONNX-Chainer has been tested only with opset_version {} ~ {}' 'The ONNX file exported with your requested opset_version ({}) ' 'may cause some problems because the converters used for the ' 'opset_version have not been tested.'.format( MINIMUM_OPSET_VERSION, MAXIMUM_OPSET_VERSION, opset_version)) if input_shapes is not None: # if input shapes are invalid, raise exception before forwarding. input_shapes = format_customized_shapes(args, input_shapes) with RetainInputHook(): # Forward computation context = Context(model) network_inputs = OrderedDict() if isinstance(args, tuple): args = list(args) if isinstance(args, list): for i, arg in enumerate(args): if isinstance(arg, chainer.get_array_types()): args[i] = chainer.Variable(arg) network_inputs[context.get_name(args[i])] = args[i] outputs = model(*args) elif isinstance(args, dict): for key, arg in args.items(): if isinstance(arg, chainer.get_array_types()): args[key] = chainer.Variable(arg) network_inputs[context.get_name(args[key])] = args[key] outputs = model(**args) elif isinstance(args, chainer.get_array_types()): args = chainer.Variable(args) network_inputs[context.get_name(args)] = args outputs = model(args) elif isinstance(args, chainer.Variable): network_inputs[context.get_name(args)] = args outputs = model(args) else: raise ValueError( 'The \'args\' argument should be a list, tuple, dict, ' 'numpy array, or Chainer Variable. But a {} object was ' 'given.'.format(type(args))) rename_variable_name(context, args, network_inputs, input_names) initializers = [] input_tensors = [] param_names = set() for org_name, param in model.namedparams(): # `model.namedparams()` has `include_uninit` flag but not use, to # output user warning if param.array is None: warnings.warn( 'The parameter \'{}\' is not initialized, skip setting to ' 'ONNX graph'.format(org_name)) continue name = context.get_name(param) param_names.add(name) tensor = convert_parameter(param, context) initializers.append(tensor) input_tensors.append( helper.make_tensor_value_info(name, tensor.data_type, tensor.dims)) for i, (name, var) in enumerate(network_inputs.items()): shape = var.shape if input_shapes is None else input_shapes[i] input_tensors.append( helper.make_tensor_value_info( name, NP_TYPE_TO_TENSOR_TYPE[var.dtype], shape)) if external_converters: chainer.utils.experimental('external_converters') converters = dict(mapping.converters, **external_converters) else: converters = mapping.converters if isinstance(outputs, (list, tuple)): flat_outputs = outputs elif isinstance(outputs, dict): flat_outputs = list(outputs.values()) elif isinstance(outputs, chainer.Variable): flat_outputs = [outputs] else: raise RuntimeError( 'Unexpected output type from the model: {}'.format( type(outputs))) if not all([isinstance(o, chainer.Variable) for o in flat_outputs]): raise ValueError('The all \'outputs\' must be Chainer Variable') network_outputs = OrderedDict([(context.get_name(var), var) for var in flat_outputs]) if output_names: rename_variable_name(context, outputs, network_outputs, output_names) o = Graph(context, converters, opset_version, param_names | set(network_inputs.keys()), network_outputs) o.to_onnx_graph() implicit_input_names = set(context.implicit_inputs.keys()) for name in implicit_input_names: tensor = convert_parameter(context.implicit_inputs[name], context) initializers.append(tensor) input_tensors.append( helper.make_tensor_value_info(name, tensor.data_type, tensor.dims)) # If additional parameters are created during conversion for param in context.parameters: tensor = convert_parameter(param, context) initializers.append(tensor) input_tensors.append( helper.make_tensor_value_info(context.get_name(param), tensor.data_type, tensor.dims)) # Convert output tensors output_tensors = [] for name, var in network_outputs.items(): output_tensors.append( helper.make_tensor_value_info(name, NP_TYPE_TO_TENSOR_TYPE[var.dtype], var.shape)) if not export_params: initializers = [] onnx_graph = helper.make_graph(o.graph, graph_name, input_tensors, output_tensors, initializer=initializers) opset_imports = [helper.make_operatorsetid('', opset_version)] if external_opset_imports: chainer.utils.experimental('external_opset_imports') for domain, version in external_opset_imports.items(): opset_imports.append(helper.make_operatorsetid(domain, version)) model = helper.make_model(onnx_graph, producer_name='Chainer', producer_version=chainer.__version__, opset_imports=opset_imports) model.ir_version = onnx.IR_VERSION check_onnx_model(model, external_converters, external_opset_imports) if input_shapes is not None: for output in model.graph.output: for d in output.type.tensor_type.shape.dim: d.Clear() model = shape_inference.infer_shapes(model) check_onnx_model(model, external_converters, external_opset_imports) if filename is not None and isinstance(filename, str): with open(filename, 'wb') as fp: fp.write(model.SerializeToString()) if save_text: with open(filename + '.txt', 'w') as fp: print(model, file=fp) elif hasattr(filename, 'write'): filename.write(model.SerializeToString()) if return_named_inout: chainer.utils.experimental('return_named_inout') return model, network_inputs, network_outputs return model
def check_flip_forward(self, xp): x = xp.arange(4, dtype=self.dtype) v = chainer.Variable(x) y = F.as_strided(v, (4, ), (-1, ), 3) y_expected = x[::-1] testing.assert_allclose(y.array, y_expected)
n_epoch = args.epoch data = Data() N = data.N TEST_N = data.TEST_N # Learning loop for epoch in six.moves.range(1, n_epoch + 1): print('epoch', epoch) # training perm = np.random.permutation(N) sum_loss = 0 # total loss for i in six.moves.range(0, N, batchsize): x_batch, t_batch = data.get(perm[i: i+batchsize]) x = chainer.Variable(xp.asarray(x_batch)) t = chainer.Variable(xp.asarray(t_batch)) # model.cleargrads() model.zerograds() loss = model(x, t) loss.backward() optimizer.update() sum_loss += float(loss.data) * len(x.data) print('train mean loss: {}'.format(sum_loss / N)) # test x_batch, t_batch = data.get(range(TEST_N), test=True) x = chainer.Variable(xp.asarray(x_batch))
def test_backward_custom_cpu(self): # Verify the both the Dynamic and Static networks produce the same # results on forward and backward passes. print('debug: Original input variable array: ', self.x) x_var_dyn = chainer.Variable(self.x) y_dyn = self.dynamic_chain(x_var_dyn) y_dyn.grad = self.gy y_dyn.backward() self.dynamic_chain.cleargrads() x_var_dyn.grad_var = None # Do forward and backward pass on the static chain and then # set its parameters to the same values as the dynamic chain. x_var_static = chainer.Variable(self.x.copy()) y_static = self.static_chain(x_var_static) y_static.grad = self.gy y_static.backward() self.static_chain.cleargrads() x_var_static.grad_var = None self.static_chain.l1.W.data = self.dynamic_chain.l1.W.data.copy() self.static_chain.l1.b.data = self.dynamic_chain.l1.b.data.copy() self.static_chain.l2.W.data[...] = self.dynamic_chain.l2.W.data self.static_chain.l2.b.data[...] = self.dynamic_chain.l2.b.data self.static_chain.l3.W.data[...] = self.dynamic_chain.l3.W.data self.static_chain.l3.b.data[...] = self.dynamic_chain.l3.b.data # Do forward pass and verify that the outputs match the dynamic # chain. # Use a different input variable for this pass. x_size = (self.batch_size, self.in_units) new_x_data = numpy.random.uniform(size=x_size).astype(self.x_dtype) print('debug: 2nd iteration input variable array: ', new_x_data) x_var_dyn = chainer.Variable(new_x_data) x_var_static = chainer.Variable(new_x_data.copy()) y_static = self.static_chain(x_var_static) assert y_static.data is not None y_dyn = self.dynamic_chain(x_var_dyn) assert y_dyn.data is not None chainer.testing.assert_allclose(y_dyn.data, y_static.data) # Use a different gy for the backward pass: y_size = (self.batch_size, self.out_units) new_y_data = numpy.random.uniform(size=y_size).astype(self.x_dtype) print('debug: 2nd iteration gy variable array: ', new_y_data) x_var_static.grad = None self.static_chain.cleargrads() y_static.grad = new_y_data y_static.backward() x_var_dyn.grad = None self.dynamic_chain.cleargrads() y_dyn.grad = new_y_data.copy() y_dyn.backward() assert x_var_dyn.grad is not None assert x_var_static.grad is not None chainer.testing.assert_allclose(x_var_dyn.grad, x_var_static.grad) self.check_network_params_are_equal() n_size = (self.batch_size, self.in_units) noise1 = 0.1 * numpy.random.uniform(size=n_size).astype(self.x_dtype) x_pass1 = new_x_data + noise1 # Modify l2.W's data: l2s = self.static_chain.l2.W.data.shape new_l2_W_data = 0.1 * numpy.random.uniform(size=l2s).astype( self.x_dtype) self.static_chain.l2.W.data = new_l2_W_data self.dynamic_chain.l2.W.data = new_l2_W_data ns = (self.batch_size, self.out_units) new_y_data = numpy.random.uniform(size=ns).astype(self.x_dtype) x_var_static.data = x_pass1 y_static = self.static_chain(x_var_static) assert y_static.data is not None y_static.grad = new_y_data self.static_chain.cleargrads() y_static.backward() x_var_dyn.data = x_pass1 y_dyn = self.dynamic_chain(x_var_dyn) assert y_dyn.data is not None y_dyn.grad = new_y_data.copy() self.dynamic_chain.cleargrads() y_dyn.backward() chainer.testing.assert_allclose(y_dyn.data, y_static.data) self.check_network_params_are_equal() assert x_var_dyn.grad is not None assert x_var_static.grad is not None chainer.testing.assert_allclose(x_var_dyn.grad, x_var_static.grad)
def test_assign_var_in_init_scope(self): p = chainer.Variable() with self.link.init_scope(): self.link.p = p self.assertTrue(all(p is not param for param in self.link.params()))
def _wrap_variable(x): if isinstance(x, list): return [_wrap_variable(xi) for xi in x] else: return chainer.Variable(x)
def check_forward(self, x): self.f.apply((chainer.Variable(x),)) assert len(self.h.call_history) == 1 check_history(self, self.h.call_history[0], functions.math.exponential.Exp, float)
def test_forward_cpu(self): with self.h: self.l(chainer.Variable(self.x))
def test_summary(self): x = self.x self.f.apply((chainer.Variable(x),)) self.f.apply((chainer.Variable(x),)) assert len(self.h.call_history) == 2 assert len(self.h.summary()) == 1
def test_forward_cpu(self): self.f(chainer.Variable(self.x))
def check_forward(self, x): with self.h: self.layer(chainer.Variable(x)) assert len(self.h.call_history) == 1 check_history(self, self.h.call_history[0], basic_math.Mul, float)
def forward(self): x = chainer.Variable(self.x) W = chainer.Variable(self.W) return F.deconvolution_2d( x, W, None, stride=1, pad=1, use_cudnn=self.use_cudnn)
def test_forward_cpu_wide(self): # see #120 x_data = numpy.random.rand(2, 3, 15, 15).astype(self.dtype) x = chainer.Variable(x_data) functions.max_pooling_2d(x, 6, stride=6, pad=0)