def test_forward_consistency(self): x_cpu = chainer.Variable(self.x) W_cpu = chainer.Variable(self.W) b_cpu = None if self.nobias else chainer.Variable(self.b) with chainer.using_config('cudnn_deterministic', self.cudnn_deterministic): y_cpu = F.deconvolution_2d( x_cpu, W_cpu, b_cpu, stride=self.stride, pad=self.pad, outsize=self.outsize) x_gpu = chainer.Variable(cuda.to_gpu(self.x)) W_gpu = chainer.Variable(cuda.to_gpu(self.W)) b_gpu = None if self.nobias else chainer.Variable( cuda.to_gpu(self.b)) with chainer.using_config('use_cudnn', self.use_cudnn): with chainer.using_config('cudnn_deterministic', self.cudnn_deterministic): y_gpu = F.deconvolution_2d( x_gpu, W_gpu, b_gpu, stride=self.stride, pad=self.pad, outsize=self.outsize) self.assertEqual(y_cpu.data.dtype, self.x_dtype) self.assertEqual(y_gpu.data.dtype, self.x_dtype) testing.assert_allclose( y_cpu.data, y_gpu.data.get(), **self.test_forward_options)
def test_call_cudnn_backward(self): with chainer.using_config('use_cudnn', self.use_cudnn): # This test includes the case in which cudnn < v3 # and cudnn_deterministic is True, in which # the backward method of chainer.functions.Deconvolution2D # must raise an error. # But in this case, its forward method should raise # an error as well. # Therefore, we intentionally set cudnn_deterministic # to False so that the forward method does not # raise an error. with chainer.using_config('cudnn_deterministic', False): y = self.forward() y.grad = self.gy data_func_name = 'cupy.cuda.cudnn.convolutionForward' filter_func_name = 'cupy.cuda.cudnn.convolutionBackwardFilter_v3' with chainer.using_config('use_cudnn', self.use_cudnn): with chainer.using_config('cudnn_deterministic', self.cudnn_deterministic): with mock.patch(data_func_name) as data_func,\ mock.patch(filter_func_name) as filter_func: y.backward() self.assertEqual( data_func.called, self.should_call_cudnn) self.assertEqual( filter_func.called, self.should_call_cudnn)
def test_call_cudnn_forward(self): with chainer.using_config('use_cudnn', self.use_cudnn): with chainer.using_config('cudnn_deterministic', self.cudnn_deterministic): with mock.patch('cupy.cudnn.cudnn.convolutionForward') as func: self.forward() self.assertEqual(func.called, self.should_call_cudnn)
def check_backward(self, x_data, W_data, b_data, y_grad, use_cudnn='never'): if not self.c_contiguous: xp = cuda.get_array_module(x_data) x_data = xp.asfortranarray(x_data) W_data = xp.asfortranarray(W_data) y_grad = xp.asfortranarray(y_grad) self.assertFalse(x_data.flags.c_contiguous) self.assertFalse(W_data.flags.c_contiguous) self.assertFalse(y_grad.flags.c_contiguous) if b_data is not None: b = xp.empty((len(b_data) * 2,), dtype=self.b.dtype) b[::2] = b_data b_data = b[::2] self.assertFalse(b_data.flags.c_contiguous) inputs = (x_data, W_data) if b_data is not None: inputs = inputs + (b_data,) ndim = len(self.dims) with chainer.using_config('use_cudnn', use_cudnn): with chainer.using_config('autotune', self.autotune): gradient_check.check_backward( deconvolution_nd.DeconvolutionND( ndim, self.stride, self.pad, self.outsize), inputs, y_grad, **self.check_backward_options)
def test_forward_gpu_train(self): self.rnn.to_gpu() with chainer.using_config('use_cudnn', 'always'), \ chainer.using_config('train', True): self.check_forward( cuda.to_gpu(self.h), [cuda.to_gpu(x) for x in self.xs])
def check_double_backward(self, x_data, W_data, b_data, y_grad, x_grad_grad, W_grad_grad, b_grad_grad, use_cudnn='always'): if not self.c_contiguous: (x_data, W_data, b_data, y_grad, x_grad_grad, W_grad_grad, b_grad_grad) = testing.array._as_noncontiguous_array( (x_data, W_data, b_data, y_grad, x_grad_grad, W_grad_grad, b_grad_grad)) args = (x_data, W_data) grad_grads = (x_grad_grad, W_grad_grad) if b_data is not None: args += (b_data,) grad_grads += (b_grad_grad,) def f(*args): return F.convolution_nd( *args, stride=self.stride, pad=self.pad, cover_all=self.cover_all, dilate=self.dilate, groups=self.groups) with chainer.using_config('use_cudnn', use_cudnn): with chainer.using_config('autotune', self.autotune): gradient_check.check_double_backward( f, args, y_grad, grad_grads, dtype='d', atol=5e-3, rtol=5e-2)
def check_backward_consistency_regression(self, x_data, gy_data, use_cudnn='always'): # Regression test to two-dimensional average pooling layer. if len(self.dims) != 2: return ksize = self.ksize stride = self.stride pad = self.pad xp = cuda.get_array_module(x_data) # Backward computation for N-dimensional average pooling layer. x_nd = chainer.Variable(xp.array(x_data)) with chainer.using_config('use_cudnn', use_cudnn): func_nd = functions.AveragePoolingND(self.ndim, ksize, stride=stride, pad=pad) y_nd = func_nd(x_nd) y_nd.grad = gy_data y_nd.backward() # Backward computation for two-dimensional average pooling layer. x_2d = chainer.Variable(xp.array(x_data)) with chainer.using_config('use_cudnn', use_cudnn): func_2d = functions.AveragePooling2D(ksize, stride=stride, pad=pad, cover_all=False) y_2d = func_2d(x_2d) y_2d.grad = gy_data y_2d.backward() # Test that the two result gradients are close enough. testing.assert_allclose(x_nd.grad, x_2d.grad)
def check_double_backward( self, inputs, grad_outputs, grad_grad_inputs, use_cudnn='always'): if not self.c_contiguous: inputs = array._as_noncontiguous_array(inputs) grad_outputs = array._as_noncontiguous_array(grad_outputs) grad_grad_inputs = array._as_noncontiguous_array(grad_grad_inputs) x_data, W_data, b_data = inputs y_grad, = grad_outputs x_grad_grad, W_grad_grad, b_grad_grad = grad_grad_inputs args = (x_data, W_data) grad_grads = (x_grad_grad, W_grad_grad) if b_data is not None: args += (b_data,) grad_grads += (b_grad_grad,) def f(*args): return F.deconvolution_nd( *args, stride=self.stride, pad=self.pad, outsize=self.outsize, dilate=self.dilate, groups=self.groups) with chainer.using_config('use_cudnn', use_cudnn): with chainer.using_config('autotune', self.autotune): gradient_check.check_double_backward( f, args, y_grad, grad_grads, **self.check_double_backward_options)
def check_backward(self, x_data, W_data, b_data, y_grad, use_cudnn='never'): if not self.c_contiguous: xp = backend.get_array_module(x_data) x_data = xp.asfortranarray(x_data) W_data = xp.asfortranarray(W_data) y_grad = xp.asfortranarray(y_grad) self.assertFalse(x_data.flags.c_contiguous) self.assertFalse(W_data.flags.c_contiguous) self.assertFalse(y_grad.flags.c_contiguous) if b_data is not None: b = xp.empty((len(b_data) * 2,), dtype=self.b.dtype) b[::2] = b_data b_data = b[::2] self.assertFalse(b_data.flags.c_contiguous) args = (x_data, W_data) if b_data is not None: args += (b_data,) def f(*args): return F.deconvolution_nd(*args, stride=self.stride, pad=self.pad, outsize=self.outsize, dilate=self.dilate, groups=self.groups) with chainer.using_config('use_cudnn', use_cudnn): with chainer.using_config('autotune', self.autotune): gradient_check.check_backward( f, args, y_grad, **self.check_backward_options)
def check_backward(self, x_data, W_data, b_data, y_grad): xp = cuda.get_array_module(x_data) if not self.c_contiguous: x_data = xp.asfortranarray(x_data) W_data = xp.asfortranarray(W_data) y_grad = xp.asfortranarray(y_grad) self.assertFalse(x_data.flags.c_contiguous) self.assertFalse(W_data.flags.c_contiguous) self.assertFalse(y_grad.flags.c_contiguous) if b_data is not None: b = xp.empty((len(b_data) * 2,), dtype=self.b.dtype) b[::2] = b_data b_data = b[::2] self.assertFalse(b_data.flags.c_contiguous) args = (x_data, W_data) if b_data is not None: args = args + (b_data,) def f(*args): return F.convolution_2d(*args, stride=self.stride, pad=self.pad, cover_all=self.cover_all, dilate=self.dilate) with chainer.using_config('use_cudnn', self.use_cudnn): with chainer.using_config('cudnn_deterministic', self.cudnn_deterministic): with chainer.using_config('autotune', self.autotune): gradient_check.check_backward( f, args, y_grad, dtype='d', atol=5e-4, rtol=5e-3)
def check_backward(self, x_data, W_data, b_data, y_grad): xp = cuda.get_array_module(x_data) if not self.c_contiguous: x_data = xp.asfortranarray(x_data) W_data = xp.asfortranarray(W_data) y_grad = xp.asfortranarray(y_grad) self.assertFalse(x_data.flags.c_contiguous) self.assertFalse(W_data.flags.c_contiguous) self.assertFalse(y_grad.flags.c_contiguous) if b_data is not None: b = xp.empty((len(b_data) * 2,), dtype=self.b.dtype) b[::2] = b_data b_data = b[::2] self.assertFalse(b_data.flags.c_contiguous) args = (x_data, W_data) if b_data is not None: args = args + (b_data,) with chainer.using_config('use_cudnn', self.use_cudnn): with chainer.using_config('cudnn_deterministic', self.cudnn_deterministic): gradient_check.check_backward( convolution_2d.Convolution2DFunction( self.stride, self.pad, self.cover_all), args, y_grad, **self.check_backward_options)
def check_backward(self, args, y_grad, use_cudnn='always'): with chainer.using_config('use_cudnn', use_cudnn), \ chainer.using_config('train', self.train): gradient_check.check_backward( batch_normalization.BatchNormalizationFunction( mean=None, var=None, decay=self.decay, eps=self.eps), args, y_grad, **self.check_backward_options)
def test_forward_gpu_volatile(self): with chainer.using_config('use_cudnn', self.use_cudnn), \ chainer.using_config('enable_backprop', False): self.check_forward( cuda.to_gpu(self.hx), [cuda.to_gpu(x) for x in self.xs], [[cuda.to_gpu(w) for w in ws] for ws in self.ws], [[cuda.to_gpu(b) for b in bs] for bs in self.bs])
def test_call_cudnn_forward(self): name = 'cupy.cudnn.convolution_backward_data' with chainer.using_config('use_cudnn', self.use_cudnn): with chainer.using_config('cudnn_deterministic', self.cudnn_deterministic): with testing.patch(name) as func: self.forward() self.assertEqual(func.called, self.should_call_cudnn)
def test_invalid_config(self): with chainer.using_config('use_cudnn', True): self.assertRaises(ValueError, chainer.should_use_cudnn, '>=auto') with chainer.using_config('use_cudnn', False): self.assertRaises(ValueError, chainer.should_use_cudnn, '>=auto') with chainer.using_config('use_cudnn', 'on'): self.assertRaises(ValueError, chainer.should_use_cudnn, '>=auto')
def call_forward(self, train): hx = _wrap_variable(_to_gpu(self.hx)) xs = _wrap_variable(_to_gpu(self.xs)) ws = _wrap_variable(_to_gpu(self.ws)) bs = _wrap_variable(_to_gpu(self.bs)) with chainer.using_config('enable_backprop', train), \ chainer.using_config('train', train): return functions.n_step_bigru( self.n_layers, self.dropout, hx, ws, bs, xs)
def check_double_backward(self, args, y_grad, x_grad_grad, use_cudnn='always'): def f(*args): y = self.batch_normalization(*args) return y * y # make nonlinear against beta with chainer.using_config('use_cudnn', use_cudnn), \ chainer.using_config('train', self.train): gradient_check.check_double_backward( f, args, y_grad, x_grad_grad, **self.check_double_backward_options)
def test_call_cudnn_backward(self): with chainer.using_config('use_cudnn', self.use_cudnn): with chainer.using_config('cudnn_deterministic', self.cudnn_deterministic): y = self.forward() y.grad = self.gy name = 'cupy.cudnn.cudnn.convolutionBackwardData_v3' with mock.patch(name) as func: y.backward() self.assertEqual(func.called, self.should_call_cudnn)
def test_forward_nonzero_gpu_test(self): # Issue #5347 # to_gpu should work without setting the current device self.rnn.to_gpu(1) with chainer.using_config('use_cudnn', 'always'), \ chainer.using_config('train', False): self.check_forward( cuda.to_gpu(self.h, 1), cuda.to_gpu(self.c, 1), [cuda.to_gpu(x, 1) for x in self.xs])
def test_singular_gpu(self): x = chainer.Variable( cuda.to_gpu(numpy.zeros((1, 2, 2), dtype=numpy.float32))) # Should raise exception only when debug mode. with chainer.using_config('debug', False): functions.batch_inv(x) with chainer.using_config('debug', True): with self.assertRaises(ValueError): functions.batch_inv(x)
def __call__(self, cur_word): # Given the current word ID, predict the next word. x = self.embed(cur_word) # dropout. ref: https://docs.chainer.org/en/stable/reference/generated/chainer.functions.dropout.html?highlight=dropout with chainer.using_config('train', True): x = F.dropout(x, args.dropout) h = self.mid(x) with chainer.using_config('train', True): h = F.dropout(h, args.dropout) y = self.out(h) return y
def test_valid_case_combination(self): with chainer.using_config('use_cudnn', 'always'): self.assertTrue(chainer.should_use_cudnn('==always')) self.assertTrue(chainer.should_use_cudnn('>=auto')) with chainer.using_config('use_cudnn', 'auto'): self.assertFalse(chainer.should_use_cudnn('==always')) self.assertTrue(chainer.should_use_cudnn('>=auto')) with chainer.using_config('use_cudnn', 'never'): self.assertFalse(chainer.should_use_cudnn('==always')) self.assertFalse(chainer.should_use_cudnn('>=auto'))
def test_consistency_with_cudnn_cpu(self): with chainer.using_config('use_cudnn', 'never'): x_cpu, grid_cpu, y_cpu = self._apply_backward( self.x, self.grid, self.grads) with chainer.using_config('use_cudnn', 'always'): x_cudnn, grid_cudnn, y_cudnn = self._apply_backward( cuda.to_gpu(self.x), cuda.to_gpu(self.grid), cuda.to_gpu(self.grads)) testing.assert_allclose(y_cpu.data, y_cudnn.data) testing.assert_allclose(x_cpu.grad, x_cudnn.grad) testing.assert_allclose(grid_cpu.grad, grid_cudnn.grad)
def forward(self, train): with chainer.using_config('use_cudnn', self.use_cudnn), \ chainer.using_config('enable_backprop', train), \ chainer.using_config('train', train): h = chainer.Variable(self.hx) xs = [chainer.Variable(x) for x in self.xs] ws = [[chainer.Variable(w) for w in ws] for ws in self.ws] bs = [[chainer.Variable(b) for b in bs] for bs in self.bs] return functions.n_step_bigru( self.n_layers, self.dropout, h, ws, bs, xs)
def _run(self): with chainer.using_config('use_cudnn', 'always'): with chainer.using_config('cudnn_deterministic', True): # verify data continuity and move to gpu x_data, W_data, b_data, gy_data = \ tuple(cuda.to_gpu(data) for data in self._contiguous( self.x, self.W, self.b, self.gy)) x, W, b, y = self._run_forward(x_data, W_data, b_data) y.grad = gy_data y.backward() return x, W, b, y
def __enter__(self): self._contexts = [ chainer.using_config( 'use_cudnn', self.use_cudnn), chainer.using_config( 'cudnn_deterministic', self.cudnn_deterministic), chainer.using_config( 'autotune', self.autotune), ] for c in self._contexts: c.__enter__() return self
def predict(model, test_iter): probs = [] test_iter.reset() for batch in test_iter: in_arrays = convert.concat_examples(batch, args.gpu) with chainer.using_config('train', False), \ chainer.using_config('enable_backprop', False): y = model(in_arrays[0]) prob = chainer.functions.softmax(y) probs.append(prob.data) return concat_arrays(probs)
def __init__(self, model, ngpu, options, data_options=None, time_options=None): self.ngpu = ngpu self.gpu_mode = True if ngpu >= 1 else False self.time_options = time_options if self.gpu_mode: self.model = [copy.deepcopy(model).to_gpu(i) for i in range(ngpu)] else: self.model = model if options['benchmark_mode']: chainer.using_config('autotune', True)
def iter_predict(Xs, Ms): logits = [] with chainer.using_config('train', False), \ chainer.using_config('enable_backprop', False): for xmb, mmb in iter_data( Xs, Ms, n_batch=n_batch_train, truncate=False, verbose=True): n = len(xmb) XMB = model.xp.asarray(xmb) MMB = model.xp.asarray(mmb) h = model(XMB) clf_logits = clf_head(h, XMB) logits.append(cuda.to_cpu(clf_logits.array)) logits = np.concatenate(logits, 0) return logits
def test_forward_cpu_graph_invariant(model, data): # This RSGCN uses dropout, so we need to forward with test mode # to remove stochastic calculation. atom_data, adj_data = data[0], data[1] with chainer.using_config('train', False): y_actual = cuda.to_cpu(model(atom_data, adj_data).data) permutation_index = numpy.random.permutation(atom_size) permute_atom_data = permute_node(atom_data, permutation_index) permute_adj_data = permute_adj(adj_data, permutation_index) with chainer.using_config('train', False): permute_y_actual = cuda.to_cpu(model( permute_atom_data, permute_adj_data).data) assert numpy.allclose(y_actual, permute_y_actual, rtol=1.e-4, atol=1.e-5)
def test_call_cudnn_forward(self): with chainer.using_config('use_cudnn', self.use_cudnn): with testing.patch('cupy.cuda.cudnn.softmaxForward') as func: self.forward() self.assertEqual(func.called, self.expect)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)' ) # open_crf layer only works for CPU mode parser.add_argument( "--model", "-m", help="pretrained model file path") # which contains pretrained target parser.add_argument("--pretrained_model", "-pre", default="resnet101") parser.add_argument("--memcached_host", default="127.0.0.1") parser.add_argument('--mean_rgb', default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy", help='image mean .npy file') parser.add_argument('--mean_flow', default=config.ROOT_PATH + "BP4D/idx/mean_flow.npy", help='image mean .npy file') parser.add_argument('--proc_num', type=int, default=10, help="multiprocess fetch data process number") parser.add_argument('--batch', '-b', type=int, default=10, help='mini batch size') args = parser.parse_args() if not args.model.endswith("model.npz"): return model_info = extract_mode(args.model) database = model_info["database"] fold = model_info["fold"] split_idx = model_info["split_idx"] backbone = model_info["backbone"] use_paper_num_label = model_info["use_paper_num_label"] use_roi_align = model_info["use_roi_align"] two_stream_mode = model_info['two_stream_mode'] T = model_info["T"] adaptive_AU_database(database) paper_report_label, class_num = squeeze_label_num_report( database, use_paper_num_label) paper_report_label_idx = list(paper_report_label.keys()) if not paper_report_label_idx: paper_report_label_idx = None class_num = len(config.AU_SQUEEZE) else: class_num = len(paper_report_label_idx) model_print_dict = OrderedDict() for key, value in model_info.items(): model_print_dict[key] = str(value) print(""" {0} ====================================== INFO: {1} ====================================== """.format(args.model, json.dumps(model_print_dict, sort_keys=True, indent=8))) au_rcnn_train_chain_list = [] if backbone == 'resnet101': if two_stream_mode != TwoStreamMode.rgb_flow: pretrained_model = backbone au_rcnn = AU_RCNN_Resnet101( pretrained_model=pretrained_model, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], use_roi_align=use_roi_align, use_optical_flow_input=( two_stream_mode == TwoStreamMode.optical_flow), temporal_length=T) au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn) au_rcnn_train_chain_list.append(au_rcnn_train_chain) else: # rgb_flow mode au_rcnn_rgb = AU_RCNN_Resnet101(pretrained_model=backbone, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], use_roi_align=use_roi_align, use_optical_flow_input=False, temporal_length=T) au_rcnn_optical_flow = AU_RCNN_Resnet101( pretrained_model=backbone, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], use_roi_align=use_roi_align, use_optical_flow_input=True, temporal_length=T) au_rcnn_train_chain_rgb = AU_RCNN_ROI_Extractor(au_rcnn_rgb) au_rcnn_train_chain_optical_flow = AU_RCNN_ROI_Extractor( au_rcnn_optical_flow) au_rcnn_train_chain_list.append(au_rcnn_train_chain_rgb) au_rcnn_train_chain_list.append(au_rcnn_train_chain_optical_flow) au_rcnn = au_rcnn_rgb model = Wrapper(au_rcnn_train_chain_list, class_num, database, T, two_stream_mode=two_stream_mode, gpus=[args.gpu, args.gpu]) chainer.serializers.load_npz(args.model, model) print("loading {}".format(args.model)) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() mc_manager = PyLibmcManager(args.memcached_host) img_dataset = AUDataset(database=database, L=T, fold=fold, split_name='test', split_index=split_idx, mc_manager=mc_manager, train_all_data=False, two_stream_mode=two_stream_mode, paper_report_label_idx=paper_report_label_idx) video_dataset = TransformDataset( img_dataset, Transform(L=T, mean_rgb_path=args.mean_rgb, mean_flow_path=args.mean_flow)) if args.proc_num == 1: test_iter = SerialIterator(video_dataset, batch_size=args.batch, repeat=False, shuffle=False) else: test_iter = MultiprocessIterator(video_dataset, batch_size=args.batch, n_processes=args.proc_num, repeat=False, shuffle=False, n_prefetch=10, shared_mem=10000000) with chainer.no_backprop_mode(), chainer.using_config( 'cudnn_deterministic', True), chainer.using_config('train', False): predict_data_path = os.path.dirname( args.model) + os.path.sep + "pred_" + os.path.basename( args.model)[:os.path.basename(args.model).rindex("_")] + ".npz" print("npz_path: {}".format(predict_data_path)) au_evaluator = ActionUnitEvaluator( test_iter, model, args.gpu, database=database, paper_report_label=paper_report_label, converter=lambda batch, device: concat_examples_not_labels( batch, device, padding=0), T=T, output_path=predict_data_path) observation = au_evaluator.evaluate() with open(os.path.dirname(args.model) + os.path.sep + "evaluation_result_{0}.json".format(os.path.basename(args.model)\ [:os.path.basename(args.model).rindex("_")] ), "w") as file_obj: file_obj.write( json.dumps(observation, indent=4, separators=(',', ': '))) file_obj.flush()
def predict(self, x): with chainer.using_config('train', False): h = self._logits(x) #with chainer.using_config('use_cudnn', 'never'): # return F.softmax(h) return F.softmax(h)
def sample_once(encdec, src_batch, tgt_batch, src_mask, src_indexer, tgt_indexer, eos_idx, max_nb=None, s_unk_tag="#S_UNK#", t_unk_tag="#T_UNK#"): with chainer.using_config("train", False), chainer.no_backprop_mode(): print "sample" sample_greedy, score, attn_list = encdec(src_batch, 50, src_mask, use_best_for_sample=True, need_score=True) # sample, score = encdec(src_batch, 50, src_mask, use_best_for_sample = False) assert len(src_batch[0].data) == len(tgt_batch[0].data) assert len(sample_greedy[0]) == len(src_batch[0].data) debatched_src = de_batch(src_batch, mask=src_mask, eos_idx=None, is_variable=True) debatched_tgt = de_batch(tgt_batch, eos_idx=eos_idx, is_variable=True) debatched_sample = de_batch(sample_greedy, eos_idx=eos_idx) sample_random, score_random, attn_list_random = encdec( src_batch, 50, src_mask, use_best_for_sample=False, need_score=True) debatched_sample_random = de_batch(sample_random, eos_idx=eos_idx) for sent_num in xrange(len(debatched_src)): if max_nb is not None and sent_num > max_nb: break src_idx_seq = debatched_src[sent_num] tgt_idx_seq = debatched_tgt[sent_num] sample_idx_seq = debatched_sample[sent_num] sample_random_idx_seq = debatched_sample_random[sent_num] print "sent num", sent_num for name, seq, unk_tag, indexer, this_eos_idx in zip( "src tgt sample sample_random".split(" "), [ src_idx_seq, tgt_idx_seq, sample_idx_seq, sample_random_idx_seq ], [s_unk_tag, t_unk_tag, t_unk_tag, t_unk_tag], [src_indexer, tgt_indexer, tgt_indexer, tgt_indexer], [None, eos_idx, eos_idx, eos_idx]): print name, "idx:", seq print name, "raw:", " ".join( indexer.deconvert_swallow( seq, unk_tag=unk_tag, eos_idx=this_eos_idx)).encode('utf-8') print name, "postp:", indexer.deconvert( seq, unk_tag=unk_tag, eos_idx=this_eos_idx).encode('utf-8')
def main(params): print("") print('# gpu: {}'.format(params["gpu"])) print('# unit: {}'.format(params["unit"])) print('# batch-size: {}'.format(params["batchsize"])) print('# epoch: {}'.format(params["epoch"])) print('# number of category: {}'.format(params["output_dimensions"])) print('# embedding dimension: {}'.format(params["embedding_dimensions"])) print('# current layer: {}'.format(params["current_depth"])) print('# model-type: {}'.format(params["model_type"])) print('') f = open('./CNN/LOG/configuration_' + params["current_depth"] + '.txt', 'w') f.write('# gpu: {}'.format(params["gpu"]) + "\n") f.write('# unit: {}'.format(params["unit"]) + "\n") f.write('# batch-size: {}'.format(params["batchsize"]) + "\n") f.write('# epoch: {}'.format(params["epoch"]) + "\n") f.write('# number of category: {}'.format(params["output_dimensions"]) + "\n") f.write( '# embedding dimension: {}'.format(params["embedding_dimensions"]) + "\n") f.write('# current layer: {}'.format(params["current_depth"]) + "\n") f.write('# model-type: {}'.format(params["model_type"]) + "\n") f.write("\n") f.close() embedding_weight = params["embedding_weight"] embedding_dimensions = params["embedding_dimensions"] input_data = params["input_data"] x_train = input_data['x_trn'] x_val = input_data['x_val'] y_train = input_data['y_trn'] y_val = input_data['y_val'] cnn_params = { "cudnn": USE_CUDNN, "out_channels": params["out_channels"], "row_dim": embedding_dimensions, "batch_size": params["batchsize"], "hidden_dim": params["unit"], "n_classes": params["output_dimensions"], "embedding_weight": embedding_weight, } if params["fine_tuning"] == 0: cnn_params['mode'] = 'scratch' elif params["fine_tuning"] == 1: cnn_params['mode'] = 'fine-tuning' cnn_params['load_param_node_name'] = params['upper_depth'] if params["model_type"] == "XML-CNN": model = xml_cnn_model.CNN(**cnn_params) else: model = cnn_model.CNN(**cnn_params) if params["gpu"] >= 0: chainer.cuda.get_device_from_id(params["gpu"]).use() model.to_gpu() # Learning CNN by training and validation data # ========================================================= optimizer = chainer.optimizers.Adam() optimizer.setup(model) train = tuple_dataset.TupleDataset(x_train, y_train) val = tuple_dataset.TupleDataset(x_val, y_val) train_iter = chainer.iterators.SerialIterator(train, params["batchsize"], repeat=True, shuffle=False) val_iter = chainer.iterators.SerialIterator(val, params["batchsize"], repeat=False, shuffle=False) # The setting of Early stopping validation refers to a loss value (validation/main/loss) obtained by validation data # ========================================================= stop_trigger = training.triggers.EarlyStoppingTrigger( monitor='validation/main/loss', max_trigger=(params["epoch"], 'epoch')) updater = MyUpdater(train_iter, optimizer, params["output_dimensions"], device=params["gpu"]) trainer = training.Trainer(updater, stop_trigger, out='./CNN/') trainer.extend( MyEvaluator(val_iter, model, class_dim=params["output_dimensions"], device=params["gpu"])) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot_object( model, 'parameters_for_multi_label_model_' + params["current_depth"] + '.npz'), trigger=training.triggers.MinValueTrigger( 'validation/main/loss', trigger=(1, 'epoch'))) trainer.extend( extensions.LogReport(log_name='LOG/log_' + params["current_depth"] + ".txt", trigger=(1, 'epoch'))) trainer.extend( extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='LOG/loss_' + params["current_depth"] + '.png')) trainer.run() filename = 'parameters_for_multi_label_model_' + params[ "current_depth"] + '.npz' src = './CNN/' dst = './CNN/PARAMS' shutil.move(os.path.join(src, filename), os.path.join(dst, filename)) # Prediction process for test data. # ========================================================= print("-" * 50) print("Testing...") x_tst = input_data['x_tst'] y_tst = input_data['y_tst'] n_eval = len(x_tst) cnn_params['mode'] = 'test-predict' cnn_params['load_param_node_name'] = params["current_depth"] if params["model_type"] == "XML-CNN": model = xml_cnn_model.CNN(**cnn_params) else: model = cnn_model.CNN(**cnn_params) model.to_gpu() output = np.zeros([n_eval, params["output_dimensions"]], dtype=np.int8) output_probability_file_name = "CNN/RESULT/probability_" + params[ "current_depth"] + ".csv" with open(output_probability_file_name, 'w') as f: f.write(','.join(params["learning_categories"]) + "\n") test_batch_size = params["batchsize"] with chainer.using_config('train', False), chainer.no_backprop_mode(): for i in tqdm(six.moves.range(0, n_eval, test_batch_size), desc="Predict Test loop"): x = chainer.Variable( chainer.cuda.to_gpu(x_tst[i:i + test_batch_size])) t = y_tst[i:i + test_batch_size] net_output = F.sigmoid(model(x)) output[i:i + test_batch_size] = select_function(net_output.data) with open(output_probability_file_name, 'a') as f: tmp = chainer.cuda.to_cpu(net_output.data) low_values_flags = tmp < 0.001 tmp[low_values_flags] = 0 np.savetxt(f, tmp, fmt='%.4g', delimiter=",") return output
def predict(filter_num = 5,inpaint = 1,save_file = ""): svm = pickle.load(open('./np_data/svm_over_rap.sav', 'rb')) filter_str = str(filter_num) seg = 0 #モデルの定義 model = L.Classifier(CNN()) # モデルの読み込み serializers.load_npz("./_snapshot_epoch-50", model, path= 'updater/model:main/') TP = 0.0 FP = 0.0 FN = 0.0 TN = 0.0 data_channels = 13 data_dir_path1 = u"./data/2.5m_median41" data_dir_path2 = u"./data/2.5m_half" file_list = os.listdir(r'./data/2.5m_half/') nnum = 0 for file_name in file_list: root, ext = os.path.splitext(file_name) if ext == u'.bmp': nnum = nnum + 1 print(file_name,nnum) abs_name1 = data_dir_path1 + '/' + file_name abs_name2 = data_dir_path2 + '/' + file_name file_name = file_name[:-4] if data_channels == 3 or data_channels == 33 : src_img = cv2.imread(abs_name1) height, width,channela = src_img.shape if data_channels == 1 or data_channels == 13: src_img = cv2.imread(abs_name1,0) height, width = src_img.shape dst_img = cv2.imread(abs_name2) f1_img = cv2.imread(abs_name2) mask = np.zeros((height, width), np.uint8) # オーバーラップの画素数を決定 over_rap = 25 new_img_height = 50 new_img_width = 50 width_split = int(width/(new_img_width-over_rap))-1 height_split = int(height/(new_img_height-over_rap))-1 a1,b1,c1 = 0,0,0 num = 0 for h in range(height_split): height_start = h * over_rap height_end = height_start + new_img_height for w in range(width_split): width_start = w * over_rap width_end = width_start + new_img_width num = num +1 clp1 = src_img[height_start:height_end, width_start:width_end] PIL_data=Image.fromarray(clp1) # RGBカラー画像 if data_channels == 3: r,g,b = PIL_data.split() rImgData = np.asarray(np.float32(r)/255.0) gImgData = np.asarray(np.float32(g)/255.0) bImgData = np.asarray(np.float32(b)/255.0) imgData = np.asarray([rImgData, gImgData, bImgData]) # grayImgData = np.asarray(np.float32(PIL_data)/255.0) x = imgData # RGBカラー画像と領域分割 if data_channels == 33: r,g,b = PIL_data.split() rImgData = np.asarray(np.float32(r)/255.0) gImgData = np.asarray(np.float32(g)/255.0) bImgData = np.asarray(np.float32(b)/255.0) seg_n = "seg" if os.path.isfile("./data/"+seg_n+"_hall_batch/"+file_name+"_"+str(num)+".bmp") == True: seg_img1 = np.array(Image.open("./data/"+seg_n+"_hall_batch/"+file_name+"_"+str(num)+".bmp").convert('L')) a1 = a1 + 1 else : seg_img1 = np.array(np.full((50,50), 255, dtype=np.uint8)) if os.path.isfile("./data/"+seg_n+"_shadow_batch/"+file_name+"_"+str(num)+".bmp") == True: seg_img2 = np.array(Image.open("./data/"+seg_n+"_shadow_batch/"+file_name+"_"+str(num)+".bmp").convert('L')) b1 = b1 + 1 else : seg_img2 = np.array(np.full((50,50), 255, dtype=np.uint8)) if os.path.isfile("./data/"+seg_n+"_hyouzi_batch/"+file_name+"_"+str(num)+".bmp") == True: seg_img3 = np.array(Image.open("./data/"+seg_n+"_hyouzi_batch/"+file_name+"_"+str(num)+".bmp").convert('L')) c1 = c1 + 1 else : seg_img3 = np.array(np.full((50,50), 255, dtype=np.uint8)) # seg1 = np.asarray(np.float32(seg_img1)/255.0) seg2 = np.asarray(np.float32(seg_img2)/255.0) seg3 = np.asarray(np.float32(seg_img3)/255.0) imgData = np.asarray([bImgData, gImgData, rImgData,seg1,seg2,seg3]) x = imgData # メディアンフィルタを用いた補正処理 if data_channels == 1: grayImgData = np.asarray(np.float32(PIL_data)/255.0) x = grayImgData[None,...] # メディアンフィルタを用いた補正処理と領域分割 if data_channels == 13: grayImgData = np.asarray(np.float32(PIL_data)/255.0) seg_n = "2.5m" if os.path.isfile("./data/"+seg_n+"_hall_over/"+file_name+"_"+str(num)+".bmp") == True: seg_img1 = np.array(Image.open("./data/"+seg_n+"_hall_over/"+file_name+"_"+str(num)+".bmp").convert('L')) a1 = a1 + 1 else : seg_img1 = np.array(np.full((50,50), 0, dtype=np.uint8)) if os.path.isfile("./data/"+seg_n+"_shadow_over/"+file_name+"_"+str(num)+".bmp") == True: seg_img2 = np.array(Image.open("./data/"+seg_n+"_shadow_over/"+file_name+"_"+str(num)+".bmp").convert('L')) b1 = b1 + 1 else : seg_img2 = np.array(np.full((50,50), 0, dtype=np.uint8)) if os.path.isfile("./data/"+seg_n+"_hyouzi_over/"+file_name+"_"+str(num)+".bmp") == True: seg_img3 = np.array(Image.open("./data/"+seg_n+"_hyouzi_over/"+file_name+"_"+str(num)+".bmp").convert('L')) c1 = c1 + 1 else : seg_img3 = np.array(np.full((50,50), 0, dtype=np.uint8)) seg1 = np.asarray(np.float32(seg_img1)/255.0) seg2 = np.asarray(np.float32(seg_img2)/255.0) seg3 = np.asarray(np.float32(seg_img3)/255.0) imgData = np.asarray([grayImgData,seg1,seg2,seg3]) x = imgData with chainer.using_config('train', False), chainer.using_config('enable_backprop', False): y = model.predictor(x[None,...]).data.argmax(axis=1)[0] yy = model.predictor(x[None,...]) rate = F.softmax(yy.data)[0][1] if y == 1: for y in range(height_start,height_end): for x in range(width_start,width_end): # 一回識別されるごとに63ずつマスク画像を明るくする mask[y][x] = mask[y][x]+63 if mask[y][x] > 250: mask[y][x] = 255 dst_img[y][x][2] = dst_img[y][x][2] + 20 if dst_img[y][x][2] >=255: dst_img[y][x][2] = 254 # print(a1,b1,c1) crack_mask = detection_crack(mask,file_name,svm) # a,b,c,d = F1_measure(f1_img,crack_mask,file_name,seg,"./data/t_gt_gray_own/") a,b,c,d = F1_measure(f1_img,crack_mask,file_name,seg,"./data/2.5m_gt_gray_own3/") TP = TP + a FP = FP + b FN = FN + c TN = TN + d # # cv2.imwrite('CNN_output/'+file_name+'.bmp', mask) Precision = (TP+0.001)/(TP+FP+0.001) Recall = (TP+0.001)/(TP+FN+0.001) F1 = 2*Recall*Precision/(Recall+Precision) Specificity = (TN+0.001)/(TN+FP+0.001) print("\n\nTOTAL F1-measure") print ("Precision={:.4}".format(Precision)) print ("Recall={:.4}".format(Recall)) print ("Specificity={:.4}".format(Specificity)) print ("F1={:.4}\n\n".format(F1)) f = open("./F1/F1.txt",'w') f.write("Precision={:.4}".format(Precision)+'\n') f.write("Recall={:.4}".format(Recall)+"\n") f.write("F1={:.4}".format(F1)+'\n') f.write("Specificity={:.4}".format(Specificity)+'\n') f.close() # ファイルを閉じる ### filter_num = filter_num + 1 return 0
def setUp(self): self.x = cuda.cupy.random.uniform(-1, 1, (2, 3)).astype(self.dtype) self.gy = cuda.cupy.random.uniform(-1, 1, (2, 3)).astype(self.dtype) with chainer.using_config('use_cudnn', self.use_cudnn): self.expect = chainer.should_use_cudnn('>=auto')
def train( main_script_path, func_train_one_batch, param_dict, save_distance_matrix=False, ): script_filename = os.path.splitext(os.path.basename(main_script_path))[0] chainer.config.train = False device = 0 xp = chainer.cuda.cupy config_parser = six.moves.configparser.ConfigParser() config_parser.read('config') log_dir_path = os.path.expanduser(config_parser.get('logs', 'dir_path')) p = utils.Logger(log_dir_path, **param_dict) # hyperparameters ########################################################## # load database ########################################################## if p.method == 'proxy_nca': iteration_scheme = 'clustering' else: iteration_scheme = p.method streams = data_provider.get_streams(p.batch_size, dataset=p.dataset, method=iteration_scheme, crop_size=p.crop_size) stream_train, stream_train_eval, stream_test = streams iter_train = stream_train.get_epoch_iterator() ########################################################## # construct the model ########################################################## if p.method == 'proxy_nca': dataset_class = data_provider.get_dataset_class(p.dataset) labels = dataset_class(['train'], sources=['targets'], load_in_memory=True).data_sources num_classes = len(np.unique(labels)) model = ModifiedGoogLeNet(p.out_dim, p.normalize_output, num_classes) else: model = ModifiedGoogLeNet(p.out_dim, p.normalize_output) if device >= 0: model.to_gpu() model.cleargrads() xp = model.xp optimizer_class = getattr(chainer.optimizers, p.optimizer) optimizer = optimizer_class(p.learning_rate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(p.l2_weight_decay)) print(p) stop = False logger = utils.Logger(log_dir_path) logger.soft_test_best = [0] time_origin = time.time() try: for epoch in range(p.num_epochs): time_begin = time.time() epoch_losses = [] for i in tqdm(range(p.num_batches_per_epoch), desc='# {}'.format(epoch)): with chainer.using_config('train', True): loss = func_train_one_batch(model, p, next(iter_train)) loss.backward() optimizer.update() model.cleargrads() epoch_losses.append(loss.data) del loss loss_average = cuda.to_cpu(xp.array( xp.hstack(epoch_losses).mean())) # average accuracy and distance matrix for training data D, soft, hard, retrieval = evaluate( model, stream_train_eval.get_epoch_iterator(), p.distance_type, return_distance_matrix=save_distance_matrix) # average accuracy and distance matrix for testing data D_test, soft_test, hard_test, retrieval_test = evaluate( model, stream_test.get_epoch_iterator(), p.distance_type, return_distance_matrix=save_distance_matrix) time_end = time.time() epoch_time = time_end - time_begin total_time = time_end - time_origin logger.epoch = epoch logger.total_time = total_time logger.loss_log.append(loss_average) logger.train_log.append([soft[0], hard[0], retrieval[0]]) logger.test_log.append( [soft_test[0], hard_test[0], retrieval_test[0]]) # retain the model if it scored the best test acc. ever if soft_test[0] > logger.soft_test_best[0]: logger.model_best = copy.deepcopy(model) logger.optimizer_best = copy.deepcopy(optimizer) logger.epoch_best = epoch logger.D_best = D logger.D_test_best = D_test logger.soft_best = soft logger.soft_test_best = soft_test logger.hard_best = hard logger.hard_test_best = hard_test logger.retrieval_best = retrieval logger.retrieval_test_best = retrieval_test print("#", epoch) print("time: {} ({})".format(epoch_time, total_time)) print("[train] loss:", loss_average) print("[train] soft:", soft) print("[train] hard:", hard) print("[train] retr:", retrieval) print("[test] soft:", soft_test) print("[test] hard:", hard_test) print("[test] retr:", retrieval_test) print("[best] soft: {} (at # {})".format(logger.soft_test_best, logger.epoch_best)) print(p) # print norms of the weights params = xp.hstack([ xp.linalg.norm(param.data) for param in model.params() ]).tolist() print("|W|", map(lambda param: float('%0.2f' % param), params)) print() # Draw plots if save_distance_matrix: plt.figure(figsize=(8, 4)) plt.subplot(1, 2, 1) mat = plt.matshow(D, fignum=0, cmap=plt.cm.gray) plt.colorbar(mat, fraction=0.045) plt.subplot(1, 2, 2) mat = plt.matshow(D_test, fignum=0, cmap=plt.cm.gray) plt.colorbar(mat, fraction=0.045) plt.tight_layout() plt.figure(figsize=(8, 4)) plt.subplot(1, 2, 1) plt.plot(logger.loss_log, label="tr-loss") plt.grid() plt.legend(loc='best') plt.subplot(1, 2, 2) plt.plot(logger.train_log) plt.plot(logger.test_log) plt.grid() plt.legend([ "tr-soft", "tr-hard", "tr-retr", "te-soft", "te-hard", "te-retr" ], bbox_to_anchor=(1.4, 1)) plt.ylim([0.0, 1.0]) plt.xlim([0, p.num_epochs]) plt.tight_layout() plt.show() plt.draw() del D del D_test except KeyboardInterrupt: stop = True dir_name = "-".join([ p.dataset, script_filename, time.strftime("%Y%m%d%H%M%S"), str(logger.soft_test_best[0]) ]) logger.save(dir_name) p.save(dir_name) print("total epochs: {} ({} [s])".format(logger.epoch, logger.total_time)) print("best test score (at # {})".format(logger.epoch_best)) print("[test] soft:", logger.soft_test_best) print("[test] hard:", logger.hard_test_best) print("[test] retr:", logger.retrieval_test_best) print(str(p).replace(', ', '\n')) print() return stop
def call_test(self, x): with chainer.using_config('train', False): return call_orig(self, x)
chainer.cuda.get_device_from_id(0).use() chainer.cuda.check_cuda_available() # GPU用データ形式に変換 model.to_gpu() # 学習結果を読み込む chainer.serializers.load_hdf5('genmodel.hdf5', model) # 画像を生成する num_generate = 5 # 生成する画像の数 # 元となるベクトルを作成 rnd = random.uniform(-1, 1, (num_generate, 100, 1, 1)) rnd = cp.array(rnd, dtype=cp.float32) # バッチ処理を使って一度に生成する with chainer.using_config('gens', False): result = model(rnd) # 生成した画像と元となったベクトルを保存する f = codecs.open('vectors.txt', 'w', 'utf8') for i in range(num_generate): # 画像を保存する data = np.zeros((128, 128, 3), dtype=np.uint8) dst = result.data[i] * 255.0 if uses_device >= 0: dst = chainer.cuda.to_cpu(dst) data[:, :, 0] = dst[0] data[:, :, 1] = dst[1] data[:, :, 2] = dst[2] himg = Image.fromarray(data, 'RGB') himg.save('gens/gen-' + str(i) + '.png')
y_batch = spec_utils.crop_and_concat(mask, y_batch, False) abs_diff = F.absolute_error(X_batch * mask, y_batch) loss = F.mean(abs_diff) loss.backward() optimizer.update() il = abs_diff.data.mean(axis=(1, 2, 3)) instance_loss[local_perm] += chainer.backends.cuda.to_cpu(il) sum_loss += float(loss.data) * len(X_batch) train_loss = sum_loss / len(X_train) sum_loss = 0 perm = np.random.permutation(len(X_valid)) with chainer.no_backprop_mode(), chainer.using_config('train', False): for i in range(0, len(X_valid), args.val_batchsize): local_perm = perm[i: i + args.val_batchsize] X_batch = xp.asarray(X_valid[local_perm]) y_batch = xp.asarray(y_valid[local_perm]) mask = model(X_batch) X_batch = spec_utils.crop_and_concat(mask, X_batch, False) y_batch = spec_utils.crop_and_concat(mask, y_batch, False) loss = F.mean_absolute_error(X_batch * mask, y_batch) sum_loss += float(loss.data) * len(X_batch) valid_loss = sum_loss / len(X_valid) print(' * training loss = {:.6f}, validation loss = {:.6f}' .format(train_loss * 1000, valid_loss * 1000))
def test_cpu(self): with chainer.using_config('use_ideep', 'never'): self._check()
def predict(self, imgs): """Segment object instances from images. This method predicts instance-aware object regions for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images of shape :math:`(B, C, H, W)`. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(masks, labels, scores)`. * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \ where :math:`R` is the number of masks in a image. \ Each pixel holds value if it is inside the object inside or not. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the masks. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ prepared_imgs = [] sizes = [] for img in imgs: size = img.shape[1:] img = self.prepare(img.astype(np.float32)) prepared_imgs.append(img) sizes.append(size) masks = [] labels = [] scores = [] for img, size in zip(prepared_imgs, sizes): with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): # inference img_var = chainer.Variable(self.xp.array(img[None])) scale = img_var.shape[3] / size[1] roi_ag_seg_scores, _, roi_cls_scores, bboxes, _ = \ self.__call__(img_var, scale) # We are assuming that batch size is 1. roi_ag_seg_score = roi_ag_seg_scores.array roi_cls_score = roi_cls_scores.array bbox = bboxes / scale # shape: (n_rois, 4) bbox[:, 0::2] = self.xp.clip(bbox[:, 0::2], 0, size[0]) bbox[:, 1::2] = self.xp.clip(bbox[:, 1::2], 0, size[1]) # shape: (n_roi, roi_size, roi_size) roi_seg_prob = F.softmax(roi_ag_seg_score).array[:, 1] roi_cls_prob = F.softmax(roi_cls_score).array roi_seg_prob = chainer.cuda.to_cpu(roi_seg_prob) roi_cls_prob = chainer.cuda.to_cpu(roi_cls_prob) bbox = chainer.cuda.to_cpu(bbox) roi_seg_prob, bbox, label, roi_cls_prob = mask_voting( roi_seg_prob, bbox, roi_cls_prob, size, self.score_thresh, self.nms_thresh, self.mask_merge_thresh, self.binary_thresh, limit=self.limit, bg_label=0) height = bbox[:, 2] - bbox[:, 0] width = bbox[:, 3] - bbox[:, 1] keep_indices = np.where((height > self.min_drop_size) & (width > self.min_drop_size))[0] roi_seg_prob = roi_seg_prob[keep_indices] bbox = bbox[keep_indices] label = label[keep_indices] roi_cls_prob = roi_cls_prob[keep_indices] mask = np.zeros((len(roi_seg_prob), size[0], size[1]), dtype=np.bool) for i, (roi_seg_pb, bb) in enumerate(zip(roi_seg_prob, bbox)): bb = np.round(bb).astype(np.int32) y_min, x_min, y_max, x_max = bb roi_msk_pb = resize( roi_seg_pb.astype(np.float32)[None], (y_max - y_min, x_max - x_min)) roi_msk = (roi_msk_pb > self.binary_thresh)[0] mask[i, y_min:y_max, x_min:x_max] = roi_msk masks.append(mask) labels.append(label) scores.append(roi_cls_prob) return masks, labels, scores
def perform_reconstructions(model=None, train=None, test=None, unseen=None, no_images=None, name_suffix=None, args=None): train_ind = np.linspace(0, len(train) - 1, no_images, dtype=int) x = chainer.Variable(np.asarray(train[train_ind])) with chainer.using_config('train', False), chainer.no_backprop_mode(): x1 = model(x) z1 = model.get_latent(x) save_images(x=x.data, z=[], no_images=no_images, filename=os.path.join(args.out, 'train_' + name_suffix), args=args) save_images(x=x1.data, z=z1.data, no_images=no_images, filename=os.path.join(args.out, 'train_' + name_suffix + "_rec"), args=args) # reconstruct testing examples test_ind = np.linspace(0, len(test) - 1, no_images, dtype=int) x = chainer.Variable(np.asarray(test[test_ind])) with chainer.using_config('train', False), chainer.no_backprop_mode(): x1 = model(x) z1 = model.get_latent(x) save_images(x=x.data, z=[], no_images=no_images, filename=os.path.join(args.out, 'test_' + name_suffix), args=args) save_images(x=x1.data, z=z1.data, no_images=no_images, filename=os.path.join(args.out, 'test_' + name_suffix + "_rec"), args=args) # reconstruct unseen examples if len(unseen) != 0: unseen_ind = np.linspace(0, len(unseen) - 1, no_images, dtype=int) x = chainer.Variable(np.asarray(unseen[unseen_ind])) with chainer.using_config('train', False), chainer.no_backprop_mode(): x1 = model(x) z1 = model.get_latent(x) save_images(x=x.data, z=[], no_images=no_images, filename=os.path.join(args.out, 'unseen_' + name_suffix), args=args) save_images(x=x1.data, z=z1.data, no_images=no_images, filename=os.path.join(args.out, 'unseen_' + name_suffix + "_rec"), args=args) # draw images from randomly sampled z under a 'vanilla' normal distribution z = chainer.Variable( np.random.normal(0, 1, (no_images, args.dimz)).astype(np.float32)) x = model.decode(z) save_images(x=x.data, z=z.data, no_images=no_images, filename=os.path.join(args.out, 'sampled_' + name_suffix), args=args)
def update_Z_speech(self, var_propose_distribution=1e-4): """ Parameters: var_propose_distribution: float the variance of the propose distribution Results: self.Z_speech_DT: self.xp.array [ n_latent x T ] the latent variable of each speech """ self.WHG_noise_FTM = (self.lambda_NFT[1:][..., None] * self.G_NFM[1:, :, None]).sum(axis=0) self.UVG_FTM = (self.U_F[:, None] * self.V_T[None])[:, :, None] * self.G_NFM[0, :, None] if "backprop" in self.mode_update_Z: # acceptance rate is calculated from likelihood for it in range(self.n_Z_iteration): with chainer.using_config('train', False): self.z_optimizer_speech.update(self.loss_func_Z, self.z_link_speech.z, self.speech_VAE, 0) self.Z_speech_DT = self.z_link_speech.z.data.T self.power_speech_FT = self.speech_VAE.decode_cupy( self.Z_speech_DT) if "sampling" in self.mode_update_Z: log_var = self.xp.log( self.xp.ones_like(self.Z_speech_DT).astype(self.xp.float32) * var_propose_distribution) Z_speech_old_DT = self.Z_speech_DT power_old_FTM = self.speech_VAE.decode_cupy(Z_speech_old_DT)[:, :, None] for it in range(self.n_Z_iteration): Z_speech_new_DT = chf.gaussian(Z_speech_old_DT, log_var).data lambda_old_FTM = power_old_FTM * self.UVG_FTM + self.WHG_noise_FTM power_new_FTM = self.speech_VAE.decode_cupy( Z_speech_new_DT)[:, :, None] lambda_new_FTM = power_new_FTM * self.UVG_FTM + self.WHG_noise_FTM acceptance_rate = self.xp.exp( (self.Qx_power_FTM * (1 / lambda_old_FTM - 1 / lambda_new_FTM)).sum( axis=2).sum(axis=0) + self.xp.log((lambda_old_FTM / lambda_new_FTM).prod(axis=2).prod(axis=0))) accept_flag = self.xp.random.random([self.n_time ]) < acceptance_rate Z_speech_old_DT[:, accept_flag] = Z_speech_new_DT[:, accept_flag] power_old_FTM[:, accept_flag] = power_new_FTM[:, accept_flag] self.Z_speech_DT = Z_speech_old_DT self.z_link_speech.z = chainer.Parameter(self.Z_speech_DT.T) self.power_speech_FT = self.speech_VAE.decode_cupy( self.Z_speech_DT) self.lambda_NFT[ 0] = self.U_F[:, None] * self.V_T[None] * self.power_speech_FT self.Y_FTM = (self.lambda_NFT[..., None] * self.G_NFM[:, :, None]).sum(axis=0)
def test_bias_invalid_argc2(self): func = links.Bias(self.axis, None) with chainer.using_config('debug', True): with self.assertRaises(AssertionError): func(self.x)
def test_mixed_dtype_communicator_gpu(param): model = ExampleMixedModel() with chainer.using_config('dtype', param.global_dtype): check_multi_node_mean_grad_mixed_dtype(param, model, True)
def main(): parser = argparse.ArgumentParser() parser.add_argument('config', help='YAML config file') parser.add_argument('--profile', action='store_true') parser.add_argument('--load_generator', type=str, default='') parser.add_argument('--logger_level', type=int, default=logging.INFO) parser.add_argument('--file_name', type=str, default='') args = parser.parse_args() print_args(args) # init a logger logging.basicConfig(level=args.logger_level) # load yaml config file with open(args.config) as f: config = yaml.load(f) # set random seed misc.set_random_seed(config['seed']) # define func to create env, target data sampler, and models if config['problem'] == 'photo_enhancement': def make_env(process_idx, test): assert test, "error: test should be True" env = PhotoEnhancementEnvDemo( batch_size=1, max_episode_steps=config['max_episode_steps'], imsize=config['imsize'], file_name=args.file_name) return env sample_env = make_env(0, True) gen = SpiralModel(config['imsize'], sample_env.num_parameters, config['L_stages'], config['conditional']) dis = SpiralDiscriminator(config['imsize'], config['conditional']) dataset = PhotoEnhancementDataset() else: raise NotImplementedError() # initialize optimizers gen_opt = chainer.optimizers.Adam(alpha=config['lr'], beta1=0.5) dis_opt = chainer.optimizers.Adam(alpha=config['lr'], beta1=0.5) gen_opt.setup(gen) dis_opt.setup(dis) gen_opt.add_hook(chainer.optimizer.GradientClipping(40)) dis_opt.add_hook(chainer.optimizer.GradientClipping(40)) if config['weight_decay'] > 0: gen_opt.add_hook(NonbiasWeightDecay(config['weight_decay'])) dis_opt.add_hook(NonbiasWeightDecay(config['weight_decay'])) # load generator's weight assert args.load_generator, "error: specify the weight of the model" if args.load_generator: serializers.load_npz(args.load_generator, gen) # init an spiral agent agent = SPIRAL( generator=gen, discriminator=dis, gen_optimizer=gen_opt, dis_optimizer=dis_opt, dataset=dataset, conditional=config['conditional'], reward_mode=config['reward_mode'], imsize=config['imsize'], max_episode_steps=config['max_episode_steps'], rollout_n=config['rollout_n'], gamma=config['gamma'], alpha=config['alpha'], beta=config['beta'], L_stages=config['L_stages'], U_update=config['U_update'], gp_lambda=config['gp_lambda'], n_save_final_obs_interval=config['n_save_final_obs_interval'], outdir=None, act_deterministically=True) # training mode max_episode_len = config['max_episode_steps'] * config['rollout_n'] steps = config['processes'] * config['n_update'] * max_episode_len save_interval = config['processes'] * config[ 'n_save_interval'] * max_episode_len eval_interval = config['processes'] * config[ 'n_eval_interval'] * max_episode_len step_hook = SpiralStepHook(config['max_episode_steps'], save_interval, None) env = make_env(0, True) with chainer.using_config('train', False): eval_stats = experiments.evaluator.run_evaluation_episodes( env=env, agent=agent, n_steps=None, n_episodes=1, max_episode_len=1)
xp = cupy model = L.Classifier(MLP(args)) if args.opt == 'Adam': path_model = '../models/Trained_2015_2016_{}/Adam_l{}_u{}_b{}_dr{}.snapshot'.format( args.SR_models, args.layer, args.unit, args.batchsize, args.dropout_rate) serializers.load_npz(path_model, model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() features, labels = concat_npz(args) scores = [] with chainer.using_config('train', False): if args.gpu >= 0: for feature in features: feature = feature[None, ...] feature = to_gpu(feature) pred = model.predictor( chainer.Variable(feature.astype(xp.float32))) score = to_cpu(pred.data)[0][0] scores.append(score) print(score) else: for feature in features: feature = feature[None, ...] pred = model.predictor( chainer.Variable(feature.astype(np.float32))) score = pred.data[0][0]
def test_param_layout(self): with chainer.using_config('compute_mode', 'cudnn_fast'): link = self.create_link() assert link.W.layout == memory_layouts.CUDNN_CHANNEL_LAST_W
def evaluate(model, epoch_iterator, distance='euclidean', normalize=False, batch_size=10, return_distance_matrix=False): if distance not in ('cosine', 'euclidean'): raise ValueError("distance must be 'euclidean' or 'cosine'.") with chainer.no_backprop_mode(): with chainer.using_config('train', False): y_data, c_data = iterate_forward(model, epoch_iterator, normalize=normalize) add_epsilon = True xp = cuda.get_array_module(y_data) num_examples = len(y_data) D_batches = [] softs = [] hards = [] retrievals = [] yy = xp.sum(y_data**2.0, axis=1) if distance == 'cosine': y_data = y_data / yy[:, None] # L2 normalization for start in range(0, num_examples, batch_size): end = start + batch_size if end > num_examples: end = num_examples y_batch = y_data[start:end] yy_batch = yy[start:end] c_batch = c_data[start:end] D_batch = yy + yy_batch[:, None] - 2.0 * xp.dot(y_batch, y_data.T) xp.maximum(D_batch, 0, out=D_batch) if add_epsilon: D_batch += 1e-40 # ensure the diagonal components are zero xp.fill_diagonal(D_batch[:, start:end], 0) soft, hard, retr = compute_soft_hard_retrieval(D_batch, c_data, c_batch) softs.append(len(y_batch) * soft) hards.append(len(y_batch) * hard) retrievals.append(len(y_batch) * retr) if return_distance_matrix: D_batches.append(D_batch) avg_softs = xp.sum(softs, axis=0) / num_examples avg_hards = xp.sum(hards, axis=0) / num_examples avg_retrievals = xp.sum(retrievals, axis=0) / num_examples if return_distance_matrix: D = cuda.to_cpu(xp.vstack(D_batches)) else: D = None return D, avg_softs, avg_hards, avg_retrievals
def test_call_cudnn_forward(self): with chainer.using_config('use_cudnn', self.use_cudnn): with mock.patch('cupy.cudnn.cudnn.poolingForward') as func: self.forward() self.assertEqual(func.called, chainer.should_use_cudnn('>=auto'))
def grad(outputs, inputs, grad_outputs=None, grad_inputs=None, set_grad=False, retain_grad=False, enable_double_backprop=False, loss_scale=None): """Computes the gradient of output variables w.r.t.\\ the input variables. This function implements the backpropagation algorithm. While :meth:`Variable.backward` also implements backprop, this function selects the smallest paths in the computational graph needed to compute the gradients w.r.t. inputs. The error is backpropagated only through these selected paths, which may reduce the overall computational cost. This function also differs from :meth:`Variable.backward` in the way to return the gradients; it directly returns the gradient variables as a list instead of setting gradients to the :attr:`Variable.grad_var` attribute of the original variable. It means users do not need to clear the gradient w.r.t. each variable before computing the gradient using this function. If ``set_grad`` option is set to ``True``, the computed gradient is also stored in the :attr:`Variable.grad_var` attribute of each variable, in which case any original value of :attr:`Variable.grad_var` will be updated even if it had already been set. Args: outputs (tuple or list of :class:`~chainer.Variable`): A sequence of output variables from which backprop starts. inputs (tuple or list of :class:`~chainer.Variable`): A sequence of input variables each of which this function computes the gradient w.r.t. grad_outputs (tuple or list of :class:`~chainer.Variable` or None): A sequence of variables that gives the initial value of each output gradient. If an element is set to ``None``, an array filled with 1 is used. If this argument itself is ``None``, it is treated as a sequence of ``None``\\ s. grad_inputs (tuple or list of :class:`~chainer.Variable` or None): A sequence of variables that gives the initial value of each input gradient. The gradients computed by the backprop algorithm are accumulated to them (not in-place). If an element is set to ``None``, the gradient is not accumulated to this value. If this argument itself is ``None``, it is treated as a sequence of ``None``\\ s. set_grad (bool): If it is ``True``, the :attr:`Variable.grad_var` attribute of each input variable is set to the corresponding computed gradient variable. retain_grad (bool): If it is ``True``, the gradients w.r.t. all the intermediate variables are stored in the :attr:`Variable.grad_var` attribute. In this case, the ``set_grad`` option is ignored. enable_double_backprop (bool): If it is ``True``, the computed gradients can be further backpropagated. Enabling it may increase the memory consumption (and possibly the computational time) to remember the intermediate gradient values for the second backpropagation. loss_scale (float): Loss scaling factor. Loss scaling is a usefull technique to mitigate vanishing gradient issue that tends to happen when low precision data type like float16 is used during training. If you set loss scaling factor, gradients of loss values are to be multiplied by the factor before backprop starts. The factor is propagated to whole gradients in a computational graph along the backprop. The gradients of parameters are divided by the factor just before the parameters are to be updated. Returns: A list of gradient variables w.r.t. the inputs. """ if not isinstance(outputs, (tuple, list)): raise TypeError('outputs must be a tuple or a list, not {}.'.format( type(outputs))) if not isinstance(inputs, (tuple, list)): raise TypeError('inputs must be a tuple or a list, not {}.'.format( type(inputs))) if not (grad_outputs is None or isinstance(grad_outputs, (tuple, list))): raise TypeError( 'grad_outputs must be a tuple or a list or None, not {}.'.format( type(grad_outputs))) if not (grad_inputs is None or isinstance(grad_inputs, (tuple, list))): raise TypeError( 'grad_inputs must be a tuple or a list or None, not {}.'.format( type(grad_inputs))) for v in outputs: # Raise error here if v is created by Function.backward. # In such case, we don't know exact inputs of the creator. v.node._check_old_style_gradient() # The implementation consists of three steps. # 1. Backward enumeration: all the nodes reachable backward from the output # nodes are enumerated. The forward direction links are collected in # this step. Note that the variable nodes whose requires_grad is false # are ignored and their creators are not searched. candidate_funcs = [ v.creator_node for v in outputs if v.creator_node is not None ] visited_funcs = set() forward_graph = collections.defaultdict(list) while candidate_funcs: func = candidate_funcs.pop() if func in visited_funcs: continue visited_funcs.add(func) for x in func.inputs: # Raise error here if x is created by Function.backward. # In such case, we don't know exact inputs of the creator. x._check_old_style_gradient() if not x.requires_grad: continue forward_graph[x].append(func) creator = x.creator_node if creator is not None and creator not in visited_funcs: candidate_funcs.append(creator) # 2. Forward enumeration: all the nodes in the subgraph reachable from the # input nodes are enumerated. The extracted (sub-)subgraph is the union # of all paths that backpropagation will visit. candidate_vars = [x.node for x in inputs] visited_funcs = set() grad_required = set() while candidate_vars: x = candidate_vars.pop() grad_required.add(x) for func in forward_graph[x]: if func in visited_funcs: continue visited_funcs.add(func) for y_ref in func.outputs: y = y_ref() if y is not None and y in forward_graph: candidate_vars.append(y) # 3. Backpropagation: the backpropagation is executed along the # (sub-)subgraph. It uses the topological order of the subgraph which is # induced by the reversed order of function applications ("rank"). grads = _backprop_utils.GradTable() # Initialize the gradient mapping. if grad_outputs is None: grad_outputs = (None, ) * len(outputs) for y, gy in zip(outputs, grad_outputs): if gy is None: with cuda.get_device_from_array(y.data) as device: if device is cuda.DummyDevice: gy_data = numpy.ones_like(y.data) else: gy_data = cuda.cupy.ones_like(y.data) gy = variable.Variable(gy_data, requires_grad=False) if loss_scale is not None: gy.data *= loss_scale grads[y.node] = gy if grad_inputs is not None: for x, gx in zip(inputs, grad_inputs): if gx is not None: grads[x.node] = gx # Backprop implementation. It edits grads which will only contain the # gradients w.r.t. the inputs. with chainer.using_config('enable_backprop', enable_double_backprop): ret_dict = _backprop(outputs, inputs, grad_required, retain_grad, grads, loss_scale) # Extract the gradients w.r.t. the inputs and return them. ret = [ret_dict[x.node] for x in inputs] if set_grad: for x, gx in zip(inputs, ret): x.grad_var = gx return ret
def train(loss_func, optim, train_queue, max_iter, mean_interval, save_interval, val_queue=None, lr_decay_iter_dict={}, gpu_id=-1, ignore_labels=[], outdir='./'): chainer.global_config.train = True chainer.global_config.enable_backprop = True if gpu_id >= 0: loss_func.to_gpu(device=gpu_id) for key, value in lr_decay_iter_dict.items(): if optim.t >= key: if isinstance(optim, optims.Adam): optim.eta *= value else: optim.lr *= value sum_loss = 0. while optim.t < max_iter: x_batch, t_batch, epoch_done = train_queue.get() for ignore_label in ignore_labels: t_batch[t_batch == ignore_label] = -1 if gpu_id >= 0: x_batch = cuda.to_gpu(x_batch, device=gpu_id) t_batch = cuda.to_gpu(t_batch, device=gpu_id) loss = loss_func(x_batch, t_batch) loss_func.cleargrads() loss.backward() optim.update() sum_loss += float(loss.array) if epoch_done: optim.new_epoch() print(dt.now()) print('epoch: {0:04d}, iter: {1:07d}, lr: {2:e}'.format( optim.epoch, optim.t, optim.lr)) print('train/loss: {}'.format(float(loss.array))) if optim.t in lr_decay_iter_dict: if isinstance(optim, optims.Adam): optim.eta *= lr_decay_iter_dict[optim.t] else: optim.lr *= lr_decay_iter_dict[optim.t] if optim.t % mean_interval == 0: print('mean train/loss: {}'.format(sum_loss / mean_interval)) sum_loss = 0. if val_queue is not None: val_loss = 0. val_valid_size = 0 with using_config('train', False), \ using_config('enable_backprop', False): while True: x_batch, t_batch, epoch_done = val_queue.get() for ignore_label in ignore_labels: t_batch[t_batch == ignore_label] = -1 if len(ignore_labels) > 0: valid_size = (t_batch != -1).sum() else: valid_size = t_batch.size val_valid_size += valid_size if gpu_id >= 0: x_batch = cuda.to_gpu(x_batch, device=gpu_id) t_batch = cuda.to_gpu(t_batch, device=gpu_id) loss = cuda.to_cpu(loss_func(x_batch, t_batch).array) loss *= valid_size val_loss += loss if epoch_done: break print('val/loss: {}'.format(val_loss / val_valid_size)) if optim.t % save_interval == 0: save_dst_path = os.path.join( outdir, 'model_iter_{0:07d}.npz'.format(optim.t)) S.save_npz(save_dst_path, optim.target) print('save ' + save_dst_path) save_dst_path = os.path.join( outdir, 'optim_iter_{0:07d}.npz'.format(optim.t)) S.save_npz(save_dst_path, optim) print('save ' + save_dst_path) print() if optim.t % mean_interval > 0: print('mean train/loss: {}'.format(sum_loss / (optim.t % mean_interval))) if optim.t % save_interval > 0: save_dst_path = os.path.join(outdir, 'model_iter_{0:07d}.npz'.format(optim.t)) S.save_npz(save_dst_path, optim.target) print('save ' + save_dst_path) save_dst_path = os.path.join(outdir, 'optim_iter_{0:07d}.npz'.format(optim.t)) S.save_npz(save_dst_path, optim) print('save ' + save_dst_path)
def check_backward(self, x_data, y_grad, use_cudnn='always'): with chainer.using_config('use_cudnn', use_cudnn): gradient_check.check_backward( functions.AveragePoolingND(self.ndim, self.ksize, self.stride, self.pad), x_data, y_grad, **self.check_backward_options)
def check_backward(self, x_data, gy_data, use_cudnn='always'): with chainer.using_config('use_cudnn', use_cudnn): gradient_check.check_backward(functions.LogSoftmax(), x_data, gy_data, **self.check_backward_options)
def check_forward_gpu(self, use_cudnn): with chainer.using_config('use_cudnn', use_cudnn): self.check_forward(_to_gpu(self.hx), _to_gpu(self.xs), _to_gpu(self.ws), _to_gpu(self.bs))
def greedy_batch_translate(encdec, eos_idx, src_data, batch_size=80, gpu=None, get_attention=False, nb_steps=50, reverse_src=False, reverse_tgt=False): with chainer.using_config("train", False), chainer.no_backprop_mode(): if encdec.encdec_type() == "ff": result = encdec.greedy_batch_translate(src_data, mb_size=batch_size, nb_steps=nb_steps) if get_attention: dummy_attention = [] for src, tgt in zip(src_data, result): dummy_attention.append( np.zeros((len(src), len(tgt)), dtype=np.float32)) return result, dummy_attention else: return result nb_ex = len(src_data) nb_batch = nb_ex / batch_size + (1 if nb_ex % batch_size != 0 else 0) res = [] attn_all = [] for i in range(nb_batch): current_batch_raw_data = src_data[i * batch_size:(i + 1) * batch_size] if reverse_src: current_batch_raw_data_new = [] for src_side in current_batch_raw_data: current_batch_raw_data_new.append(src_side[::-1]) current_batch_raw_data = current_batch_raw_data_new src_batch, src_mask = make_batch_src(current_batch_raw_data, gpu=gpu) sample_greedy, score, attn_list = encdec( src_batch, nb_steps, src_mask, use_best_for_sample=True, keep_attn_values=get_attention) deb = de_batch(sample_greedy, mask=None, eos_idx=eos_idx, is_variable=False) res += deb if get_attention: deb_attn = de_batch(attn_list, mask=None, eos_idx=None, is_variable=True, raw=True, reverse=reverse_tgt) attn_all += deb_attn if reverse_tgt: new_res = [] for t in res: if t[-1] == eos_idx: new_res.append(t[:-1][::-1] + [t[-1]]) else: new_res.append(t[::-1]) res = new_res if get_attention: assert not reverse_tgt, "not implemented" return res, attn_all else: return res
def test_backward_gpu(self): with chainer.using_config('use_cudnn', 'always'): self.check_backward(_to_gpu(self.hx), _to_gpu(self.xs), _to_gpu(self.ws), _to_gpu(self.bs), _to_gpu(self.dhy), _to_gpu(self.dys))
def forward(self, inputs): """ Parameters ---------- inputs : ``torch.autograd.Variable`` Shape ``(batch_size, timesteps, 50)`` of character ids representing the current batch. We also accept tensors with additional optional dimensions: ``(batch_size, dim0, dim1, ..., dimn, timesteps, 50)`` Returns ------- Dict with keys: ``'elmo_representations'``: ``List[torch.autograd.Variable]`` A ``num_output_representations`` list of ELMo representations for the input sequence. Each representation is shape ``(batch_size, timesteps, embedding_dim)`` ``'mask'``: ``torch.autograd.Variable`` Shape ``(batch_size, timesteps)`` long tensor with sequence mask. """ if self.use_character_inputs: # reshape the input if needed original_shape = inputs.shape timesteps, num_characters = original_shape[-2:] if len(original_shape) > 3: reshaped_inputs = inputs.reshape( (-1, timesteps, num_characters)) else: reshaped_inputs = inputs else: # reshape the input if needed original_shape = inputs.shape timesteps = original_shape[-1] if len(original_shape) > 2: warnings.warn( 'It is not tested to use input with shape (batch_size, dim0, ..., timesteps) to token-input Elmo.\n' 'Input with shape (batch_size, timesteps) is recommended.') reshaped_inputs = inputs.reshape((-1, timesteps)) else: reshaped_inputs = inputs # run the biLM # no backprop through bilstm for lightening computations with chainer.using_config("train", False), \ chainer.no_backprop_mode(): bilm_output = self._elmo_lstm.forward(reshaped_inputs) layer_activations = bilm_output['activations'] mask_with_bos_eos = bilm_output['mask'] # compute the elmo representations representations = [] for i in range(len(self._scalar_mixes)): scalar_mix = getattr(self, 'scalar_mix_{}'.format(i)) representation_with_bos_eos = scalar_mix.forward( layer_activations, mask_with_bos_eos) representation_without_bos_eos, mask_without_bos_eos = remove_sentence_boundaries( representation_with_bos_eos, mask_with_bos_eos) representations.append( F.dropout(representation_without_bos_eos, ratio=self._dropout_ratio)) if self.use_character_inputs: # reshape if necessary if len(original_shape) > 3: mask = mask_without_bos_eos.reshape(original_shape[:-1]) elmo_representations = [ representation.reshape(original_shape[:-1] + (-1, )) for representation in representations ] else: mask = mask_without_bos_eos elmo_representations = representations else: if len(original_shape) > 2: mask = mask_without_bos_eos.reshape(original_shape) elmo_representations = [ representation.reshape(original_shape + (-1, )) for representation in representations ] else: mask = mask_without_bos_eos elmo_representations = representations layer_activations_without_bos_eos = [ remove_sentence_boundaries_for_variable(a_layer_activation, mask_with_bos_eos)[0] for a_layer_activation in layer_activations ] return { 'elmo_representations': elmo_representations, 'mask': mask, 'elmo_layers': layer_activations_without_bos_eos }